working_dir <- '/home/phillipl/projects/HaplotypeTracker/code/HaplotypeTracker' setwd(working_dir) library(HaplotypeTracker) input_aa_file <- '/home/phillipl/projects/toolmania/data/CAP177prot/all_timesv3_prot.fasta' input_dna_file <- '/home/phillipl/projects/toolmania/data/CAP177/all_timesv3.fasta' output_dir <- '/home/phillipl/projects/HaplotypeTracker/results' xth_most_freq_seq <- 1 starting_time <- "4250V3" dataset_order <- c("2000V3", "2020V3", "2030V3", "2040V3", "2050V3", "2060V3", "3080V3", "3090V3", "3100V3", "3110V3", "3120V3", "3130V3", "3140V3", "3150V3", "3160V3", "4170V3", "4180V3", "4190V3", "4200V3", "4210V3", "4220V3", "4230V3", "4240V3", "4250V3") opts_chunk$set(echo=FALSE)
library(knitr) library(devtools) working_dir <- '/home/phillipl/projects/HaplotypeTracker/code/HaplotypeTracker' setwd(working_dir) document() build() install() load_all() knit2html(file.path(working_dir, 'inst/ideas.Rmd')) browseURL(paste('file://', file.path(getwd(),'ideas.html'), sep=''))
seq_dna_data <- readDNAStringSet(input_dna_file) seq_aa_data <- readAAStringSet(input_aa_file) seq_data <- seq_aa_data print(seq_data) print(get_unique_points_of("time", seq_data, sep = "_", indx = 2)) dput(get_unique_points_of("time", seq_data, sep = "_", indx = 2))
filtered_data <- get_data_of("timepoint", seq_data, starting_time) tab <- table(filtered_data) hap <- sort(tab, decreasing = T)[xth_most_freq_seq] prev_seq <- AAString(names(hap)) best_score_possible <- score(pairwiseAlignment(prev_seq, prev_seq, substitutionMatrix = "BLOSUM50")) haplotypes <- data.frame(selected_seq = names(hap), timepoint = starting_time, occurances = as.numeric(hap), timepoint_size = length(filtered_data), score = as.numeric(NA), best_score_possible = best_score_possible, ss_with_first_seq = best_score_possible) print(table(tab)) print(prev_seq) print(best_score_possible)
x <- match(starting_time, dataset_order) if (x > 1 & x < length(dataset_order)){ direction <- 'both' } else if (x == 1) { direction <- 'forward' } else if (x == length(dataset_order)){ direction <- 'backward' } else { stop('something went wrong with the direction detection') } print(direction)
timepoints <- rev(dataset_order) timepoints <- setdiff(timepoints, starting_time) for (timepoint in timepoints){ filtered_data <- get_data_of("timepoint", seq_data, timepoint) alignments <- pairwiseAlignment(gsub('-|~', '', unique(filtered_data)), gsub('-|~', '', prev_seq), substitutionMatrix = "BLOSUM50") # for (i in seq_along(unique(filtered_data))){ # print(i) # print(pairwiseAlignment(gsub('-|~', '', unique(filtered_data)[i]), # gsub('-|~', '', prev_seq), # substitutionMatrix = "BLOSUM50")) # } scores <- score(alignments) indx <- which(scores == max(scores)) tab <- table(filtered_data) hap <- tab[match(as.character(pattern(alignments[indx])), attr(tab, 'dimnames')$filtered_data)] hap <- hap[hap == max(hap)] hap <- hap[1] prev_seq <- AAString(names(hap)) best_score_possible <- score(pairwiseAlignment(prev_seq, prev_seq, substitutionMatrix = "BLOSUM50")) ss_with_first_seq <- score(pairwiseAlignment(prev_seq, AAString(haplotypes[1,1]), substitutionMatrix = "BLOSUM50")) haplotypes_row <- data.frame(selected_seq = names(hap), timepoint = timepoint, occurances = as.numeric(hap), timepoint_size = length(filtered_data), score = max(scores[indx]), best_score_possible = best_score_possible, ss_with_first_seq = ss_with_first_seq) haplotypes <- rbind(haplotypes, haplotypes_row) }
kable(haplotypes[,2:ncol(haplotypes)])
print(haplotypes[,1])
p <- ggplot(haplotypes, aes(x = timepoint, y = occurances)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle = 90, hjust = 1)) print(p)
p <- ggplot(haplotypes, aes(x = timepoint, y = occurances/timepoint_size)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle = 90, hjust = 1)) print(p)
seqs <- AAStringSet(haplotypes[,1]) names(seqs) <- haplotypes[,2] distmat <- stringDist(seqs) plot(bionj(distmat))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.