suppressPackageStartupMessages(library(arkas)) suppressPackageStartupMessages(library(TxDbLite)) samples<-c("n1","n2","n4","s1","s2","s4") pathBase<-system.file("extdata",package="arkasData") merged <- mergeKallisto(samples, outputPath=pathBase) assays(merged) kallistoVersion(merged) transcriptomes(merged) tail(tpm(merged))
Arkas can calculate TPMs using the effective length, or an accessor tpm(). Here we only show a TPM calculation for one sample named 'n1', but it is a uniform process and can easily be applied to many samples. For this demo we do see two outliers which an absolute difference of greater than 0.3 which is by defintion a true calculation error. By looking at the raw values derived from calcTpm(), tpm() and the raw kallisto derived .tsv number, each of the values are very close and it appears to be a rounding issue. The calculation was numerically precise for 213780/213782 which has high accuracy overall.
n1Tsv<-read.csv(paste0(pathBase,"/n1/abundance.tsv"),header=TRUE,sep="\t") head(n1Tsv) xx2<-n1Tsv$est_counts leng<-n1Tsv$eff_length xx2<-as.matrix(xx2) leng<-as.matrix(leng) colnames(xx2)<-"n1" colnames(leng)<-"n1" test1<-calcTpm(xx2,leng) err<-abs(test1-n1Tsv$tpm) err2<-abs(tpm(merged)[,1]-n1Tsv$tpm) plot(err) title("calcTpm N1 absolute Errors") plot(err2) title("tpm() N1 absolute Errors")
Here we look at the calculated TPM errors greater than 0.3
rownames(err)<-n1Tsv$target_id highErrs<-as.matrix(err[which(err>0.3),]) rownames(test1)<-n1Tsv$target_id n1Tsv[n1Tsv$target_id%in%rownames(highErrs),] #kallisto dervied test1[rownames(test1)%in%rownames(highErrs),] #arkas::calcTpm() tpp<-tpm(merged) tpp[rownames(tpp)%in%rownames(highErrs),1] #arkas::tpm()
n2Tsv<-read.csv(paste0(pathBase,"/n2/abundance.tsv"),header=TRUE,sep="\t") head(n2Tsv) xx2<-n2Tsv$est_counts leng<-n2Tsv$eff_length xx2<-as.matrix(xx2) leng<-as.matrix(leng) colnames(xx2)<-"n2" colnames(leng)<-"n2" test1<-calcTpm(xx2,leng) err<-abs(test1-n2Tsv$tpm) err2<-abs(tpm(merged)[,2]-n2Tsv$tpm) plot(err) title("calcTpm N2 absolute Errors") plot(err2) title("tpm() N2 absolute Errors")
Here we look at the calculated TPM errors greater than 0.3
rownames(err)<-n2Tsv$target_id highErrs<-as.matrix(err[which(err>0.3),]) rownames(test1)<-n2Tsv$target_id n2Tsv[n2Tsv$target_id%in%rownames(highErrs),] #kallisto dervied test1[rownames(test1)%in%rownames(highErrs),] #arkas::calcTpm() tpp<-tpm(merged) tpp[rownames(tpp)%in%rownames(highErrs),2] #arkas::tpm()
n4Tsv<-read.csv(paste0(pathBase,"/n4/abundance.tsv"),header=TRUE,sep="\t") head(n4Tsv) xx2<-n4Tsv$est_counts leng<-n4Tsv$eff_length xx2<-as.matrix(xx2) leng<-as.matrix(leng) colnames(xx2)<-"n4" colnames(leng)<-"n4" test1<-calcTpm(xx2,leng) err<-abs(test1-n4Tsv$tpm) err2<-abs(tpm(merged)[,3]-n4Tsv$tpm) plot(err) title("calcTpm N4 absolute Errors") plot(err2) title("tpm() N4 absolute Errors")
Here we look at the calculated TPM errors greater than 0.3
rownames(err)<-n4Tsv$target_id highErrs<-as.matrix(err[which(err>0.3),]) rownames(test1)<-n4Tsv$target_id n4Tsv[n4Tsv$target_id%in%rownames(highErrs),] #kallisto dervied test1[rownames(test1)%in%rownames(highErrs),] #arkas::calcTpm() tpp<-tpm(merged) tpp[rownames(tpp)%in%rownames(highErrs),3] #arkas::tpm()
s1Tsv<-read.csv(paste0(pathBase,"/s1/abundance.tsv"),header=TRUE,sep="\t") head(s1Tsv) xx2<-s1Tsv$est_counts leng<-s1Tsv$eff_length xx2<-as.matrix(xx2) leng<-as.matrix(leng) colnames(xx2)<-"s1" colnames(leng)<-"s1" test1<-calcTpm(xx2,leng) err<-abs(test1-s1Tsv$tpm) err2<-abs(tpm(merged)[,4]-s1Tsv$tpm) plot(err) title("calcTpm S1 absolute Errors") plot(err2) title("tpm() S1 absolute Errors")
Here we look at the calculated TPM errors greater than 0.3
rownames(err)<-s1Tsv$target_id highErrs<-as.matrix(err[which(err>0.3),]) rownames(test1)<-s1Tsv$target_id s1Tsv[s1Tsv$target_id%in%rownames(highErrs),] #kallisto dervied test1[rownames(test1)%in%rownames(highErrs),] #arkas::calcTpm() tpp<-tpm(merged) tpp[rownames(tpp)%in%rownames(highErrs),4] #arkas::tpm()
s2Tsv<-read.csv(paste0(pathBase,"/s2/abundance.tsv"),header=TRUE,sep="\t") head(s2Tsv) xx2<-s2Tsv$est_counts leng<-s2Tsv$eff_length xx2<-as.matrix(xx2) leng<-as.matrix(leng) colnames(xx2)<-"s2" colnames(leng)<-"s2" test1<-calcTpm(xx2,leng) err<-abs(test1-s2Tsv$tpm) err2<-abs(tpm(merged)[,5]-n2Tsv$tpm) plot(err) title("calcTpm S2 absolute Errors") plot(err2) title("tpm() S2 absolute Errors")
Here we look at the calculated TPM errors greater than 0.3
rownames(err)<-s2Tsv$target_id highErrs<-as.matrix(err[which(err>0.3),]) rownames(test1)<-s2Tsv$target_id s2Tsv[s2Tsv$target_id%in%rownames(highErrs),] #kallisto dervied test1[rownames(test1)%in%rownames(highErrs),] #arkas::calcTpm() tpp<-tpm(merged) tpp[rownames(tpp)%in%rownames(highErrs),5] #arkas::tpm()
s4Tsv<-read.csv(paste0(pathBase,"/s4/abundance.tsv"),header=TRUE,sep="\t") head(s4Tsv) xx2<-s4Tsv$est_counts leng<-s4Tsv$eff_length xx2<-as.matrix(xx2) leng<-as.matrix(leng) colnames(xx2)<-"s4" colnames(leng)<-"s4" test1<-calcTpm(xx2,leng) err<-abs(test1-s4Tsv$tpm) err2<-abs(tpm(merged)[,6]-s4Tsv$tpm) plot(err) title("calcTpm S4 absolute Errors") plot(err2) title("tpm() S4 absolute Errors")
Here we look at the calculated TPM errors greater than 0.3
rownames(err)<-s4Tsv$target_id highErrs<-as.matrix(err[which(err>0.3),]) rownames(test1)<-s4Tsv$target_id s4Tsv[s4Tsv$target_id%in%rownames(highErrs),] #kallisto dervied test1[rownames(test1)%in%rownames(highErrs),] #arkas::calcTpm() tpp<-tpm(merged) tpp[rownames(tpp)%in%rownames(highErrs),6] #arkas::tpm()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.