#' Analyze pairwise alignment
#' this is helper function for analyze script. Creates a df for every overlapping orf
#' @param typeName species name
#' @param outputDirectory folder path for reading input files
#' @param orfName identifier for ORF. SGD name if annotated
#' @param startAA aminoacid start codons calculated by \code{startCodon}
#' @param startDNA dna start codons calculated by \code{startCodon}
#' @param stopAA aa stop codons calculated by \code{stopCodon}
#' @param subalign dna level subalignment
#' @param aa aminoacid translation using subalignment
#' @param df df for reference orf values
#' @param identifier identifier for the overlapping orf name. it will be used for file names as well.
#' @return dataFrame object
analyzePairwise <- function(typeName, outputDirectory, orfName,startAA, startDNA, stopAA, subalign, aa,df,identifier='best') {
rw <- 1
i <- 2
cn <- c( paste0('Identifier ',typeName),paste("What is the Start Codon in ",typeName, "?",sep=''),paste("What is the Start Codon(DNA) in ",typeName,"?",sep=''),paste("Does start codon align in ",typeName,"?",sep=''),
paste("What is the Stop Codon in ",typeName,"?",sep=''),
paste(typeName," % DNA ID over Smorf Frame",sep=''), paste("Length of ",typeName," DNA Sequence over Smorf Frame",sep=''),paste("Number of ",typeName," Gaps over Smorf",sep=''),
paste("Is there an ORF in the ",typeName," Amino Acid Sequence?",sep=''),
paste("Is length of ORF same in ",typeName,"?",sep=''),
paste("Length of ",typeName," Amino Acid start to finish with Gaps",sep=''), paste("Length of ",typeName," Amino Acid Start to Finish without Gaps",sep=''), paste(typeName, " Length Ratio",sep=''),
paste(typeName," % Amino Acid over ",typeName," Overlap",sep=''), paste(typeName, " % Amino Acid over Smorf Frame",sep=''), paste(typeName, " Number of identical Amino Acid over ",typeName," Overlap",sep=''))
#coltypeNames <- append(coltypeNames,cn)
dataTable<-data.frame(matrix(ncol = length(cn), nrow = 1))#length(list.files(p_))))
colnames(dataTable)<-cn
dataTable[[paste0('Identifier ',typeName)]][rw] <- identifier
#Read inputs####
AAFileName<-paste0(outputDirectory,'/',typeName,'/',orfName, '_AATranslation_',typeName,'_',identifier,'.fa')
DNAFileName<-paste0(outputDirectory,'/',typeName,'/',orfName, '_subalignment_',typeName,'_',identifier,'.fa')
AAOverlapFileName<-paste0(outputDirectory,'/',typeName,'/',orfName, '_AATranslation_overlap_',typeName,'_',identifier,'.fa')
orfFileName <- paste0(outputDirectory,'/',typeName,'/',orfName, '_orf_aa_',typeName,'_',identifier,'.fa')
check<-FALSE
if(file.exists(AAFileName)){
AA<-readAAStringSet(AAFileName)
overlap <- readAAStringSet(AAOverlapFileName)
orf <- readAAStringSet(orfFileName)
check<-TRUE
}else{
warning(paste0('File: ',AAFileName, ' does not exists. Skipping...'))
return(NULL)
}
if(check){
dataTable[[paste("Is there an ORF in the ",typeName," Amino Acid Sequence?",sep='')]][rw]<-TRUE
}else{
dataTable[[paste("Is there an ORF in the ",typeName," Amino Acid Sequence?",sep='')]][rw]<-FALSE
AA<-FALSE
}
dataTable[[paste("What is the Start Codon in ",typeName, "?",sep='')]][rw]<-as.character(startAA[[i]])
dataTable[[paste("What is the Start Codon(DNA) in ",typeName,"?",sep='')]][rw]<-as.character(startDNA[[i]])
dataTable[[paste("Does start codon align in ",typeName,"?",sep='')]][rw]<-(as.character(startAA[[i]])==as.character(startAA[[1]]))
dataTable[[paste("What is the Stop Codon in ",typeName,"?",sep='')]][rw]<-as.character(stopAA[[i]])
dataTable[[paste(typeName," % DNA ID over Smorf Frame",sep='')]][rw]<-calcIdentity(subalign)[i]
dataTable[[paste("Length of ",typeName," DNA Sequence over Smorf Frame",sep='')]][rw]<-nchar(turnWoGaps(subalign[[i]]))
dataTable[[paste("Number of ",typeName," Gaps over Smorf",sep='')]][rw]<-length(subalign[[i]])-dataTable[[paste("Length of ",typeName," DNA Sequence over Smorf Frame",sep='')]][rw]
if(is.logical(AA)!=TRUE){
dataTable[[paste("Length of ",typeName," Amino Acid start to finish with Gaps",sep='')]][rw]<-nchar(AA[[2]])
dataTable[[paste("Length of ",typeName," Amino Acid Start to Finish without Gaps",sep='')]][rw]<-nchar(orf[[1]])
dataTable[[paste(typeName, " Length Ratio",sep='')]][rw] <- nchar(orf[[1]])/df$`Length of Amino Acid Sequence ORF`[rw]
dataTable[[paste(typeName," % Amino Acid over ",typeName," Overlap",sep='')]][rw]<-calcIdentity(overlap)[2]
dataTable[[paste(typeName, " Number of identical Amino Acid over ",typeName," Overlap",sep='')]][rw] <- calcIdentity(overlap,percent = F)[2]
}
dataTable[[paste(typeName, " % Amino Acid over Smorf Frame",sep='')]][rw]<-calcIdentity(aa)[i]
dataTable[[paste("Is length of ORF same in ",typeName,"?",sep='')]][rw]<-df$`Length of Amino Acid Sequence ORF`[rw]==dataTable[[paste("Length of ",typeName," Amino Acid Start to Finish without Gaps",sep='')]][rw]
dataTable
}
#'Finding ORFs using aa sequence
#'
findORF <- function(aa_seq) {
str_extract_all(aa_seq,'M[^X]*X')[[1]]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.