no.extension <- function(filename) {
if (substr(filename, nchar(filename), nchar(filename))==".") {
return(substr(filename, 1, nchar(filename)-1))
} else {
no.extension(substr(filename, 1, nchar(filename)-1))
}
}
extract.ages <- function(file = NULL, replicates = 1, cutoff = NULL, random = TRUE){
if (is.null(file))
stop("you must enter a filename or a character string\n")
rnd <- random
q <- cutoff
dat1 <- read.table(file, header=T, stringsAsFactors=F, row.names=NULL, sep="\t", strip.white=T)
fname <- no.extension(basename(file))
outfile <- paste(dirname(file), "/", fname, "_PyRate.py", sep="")
dat1[,1] <- gsub("[[:blank:]]{1,}","_", dat1[,1])
if (replicates > 1){
rnd <- TRUE
}
if (any(is.na(dat1[,1:4]))){
print(c(which(is.na(dat1[,1])),which(is.na(dat1[,2])),which(is.na(dat1[,3])),which(is.na(dat1[,4])) ) )
stop("the input file contains missing data in species names, status or ages)\n")
}
if (!is.null(q)){
dat <- dat1[!(dat1[,4] - dat1[,3] >= q),]
cat("\n\nExcluded ", 100-round(100*dim(dat)[1]/dim(dat1)[1]), "% occurrences")
hist(dat1[,4] - dat1[,3])
} else {
dat <- dat1
}
if (length(dat) == 5){
colnames(dat) <- c("Species", "Status", "min_age", "max_age", "trait")
} else {
colnames(dat) <- c("Species", "Status", "min_age", "max_age")
}
dat$new_age <- "NA"
splist <- unique(dat[,c(1,2)])[order(unique(dat[,c(1,2)][,1])),]
if (any(is.element(splist$Species[splist$Status == "extant"], splist$Species[splist$Status == "extinct"]))){
print(intersect(splist$Species[splist$Status == "extant"], splist$Species[splist$Status == "extinct"]))
stop("at least one species is listed as both extinct and extant\n")
}
cat("#!/usr/bin/env python", "from numpy import * ", "", file=outfile, sep="\n")
for (j in 1:replicates){
times <- list()
cat ("\nreplicate", j)
dat[dat$min_age == 0,3] <- 0.001
#if (any(dat[,4] < dat[,3])){
# cat("\nWarning: the min age is older than the max age for at least one record\n")
# cat ("\nlines:",1+as.numeric(which(dat[,4] < dat[,3])),sep=" ")
#}
if (isTRUE(rnd)){
dat$new_age <- round(runif(length(dat[,1]), min=apply(dat[,3:4],FUN=min,1), max=apply(dat[,3:4],FUN=max,1)), digits=6)
} else {
for (i in 1:length(dat[,1])){
dat$new_age[i] <- mean(c(dat[i,3], dat[i,4]))
}
}
dat2 <- subset(dat, select=c("Species","new_age"))
taxa <- sort(unique(dat2$Species))
for (n in 1:length(taxa)){
times[[n]] <- dat2$new_age[dat2$Species == taxa[n]]
if (toupper(splist$Status[splist$Species == taxa[n]]) == toupper("extant")){
times[[n]] <- append(times[[n]], "0", after=length(times[[n]]))
}
}
dat3 <- matrix(data=NA, nrow=length(times), ncol=max(sapply(times, length)))
rownames(dat3) <- taxa
for (p in 1:length(times)){
dat3[p,1:length(times[[p]])] <- times[[p]]
}
cat(noquote(sprintf("\ndata_%s=[", j)), file=outfile, append=TRUE)
for (n in 1:(length(taxa)-1)){
rec <- paste(dat3[n,!is.na(dat3[n,])], collapse=",")
cat(noquote(sprintf("array([%s]),", rec)), file=outfile, append=TRUE, sep="\n")
}
n <- n+1
rec <- paste(dat3[n,!is.na(dat3[n,])], collapse=",")
cat(noquote(sprintf("array([%s])", rec)), file=outfile, append=TRUE, sep="\n")
cat("]", "", file=outfile, append=TRUE, sep="\n")
}
data_sets <- ""
names <- ""
if (replicates > 1){
for (j in 1:(replicates-1)) {
data_sets <- paste(data_sets, noquote(sprintf("data_%s,", j)))
names <- paste(names, noquote(sprintf(" '%s_%s',", fname,j)))
}
data_sets <- paste(data_sets, noquote(sprintf("data_%s", j+1)))
names <- paste(names, noquote(sprintf(" '%s_%s',", fname,j+1)))
} else {
data_sets <- "data_1"
names <- noquote(sprintf(" '%s_1'", fname))
}
cat(noquote(sprintf("d=[%s]", data_sets)), noquote(sprintf("names=[%s]", names)), "def get_data(i): return d[i]", "def get_out_name(i): return names[i]", file=outfile, append=TRUE, sep="\n")
tax_names <- paste(taxa, collapse="','")
cat(noquote(sprintf("taxa_names=['%s']", tax_names)), "def get_taxa_names(): return taxa_names", file=outfile, append=TRUE, sep="\n")
if ("trait" %in% colnames(dat)){
datBM <- dat[,1]
splist$Trait <- NA
for (n in 1:length(splist[,1])){
splist$Trait[n] <- mean(dat$trait[datBM == splist[n,1]], na.rm=T)
}
s1 <- "\ntrait1=array(["
BM <- gsub("NaN|NA", "nan", toString(splist$Trait))
s2 <- "])\ntraits=[trait1]\ndef get_continuous(i): return traits[i]"
STR <- paste(s1,BM,s2)
cat(STR, file=outfile, append=TRUE, sep="\n")
}
splistout <- paste(dirname(file), "/", fname, "_TaxonList.txt", sep="")
lookup <- as.data.frame(taxa)
lookup$status <- "extinct"
write.table(splist, file=splistout, sep="\t", row.names=F, quote=F)
cat("\n\nPyRate input file was saved in: ", sprintf("%s", outfile), "\n\n")
}
fit.prior <- function(file = NULL, lineage = "root_age"){
require(fitdistrplus)
if (is.null(file)){
stop("You must enter a valid filename.\n")
}
dat <- read.table(file, header=T, stringsAsFactors=F, row.names=NULL, sep="\t")
fname <- no.extension(basename(file))
outfile <- paste(dirname(file), "/", lineage, "_Prior.txt", sep="")
lineage2 <- paste(lineage,"_TS", sep="")
if (!is.element(lineage2, colnames(dat))){
stop("Lineage not found, please check your input.\n")
}
time <- dat[,which(names(dat) == lineage2)]
time2 <- time-(min(time)-0.01)
gamm <- fitdist(time2, distr="gamma", method = "mle")$estimate
cat("Lineage: ", lineage, "; Shape: ", gamm[1], "; Scale: ", 1/gamm[2], "; Offset: ", min(time), sep="", file=outfile, append=FALSE)
}
extract.ages.pbdb <- function(file = NULL,sep=",", extant_species = c(), replicates = 1, cutoff = NULL, random = TRUE){
print("This function is currently being tested - caution with the results!")
tbl = read.table(file=file,h=T,sep=sep,stringsAsFactors =F)
new_tbl = NULL # ADD EXTANT SPECIES
for (i in 1:dim(tbl)[1]){
if (tbl$accepted_name[i] %in% extant_species){
status="extant"
}else{status="extinct"}
species_name = gsub(" ", "_", tbl$accepted_name[i])
new_tbl = rbind(new_tbl,c(species_name,status,tbl$min_ma[i],tbl$max_ma[i]))
}
colnames(new_tbl) = c("Species","Status","min_age","max_age")
output_file = file.path(dirname(file),strsplit(basename(file), "\\.")[[1]][1])
output_file = paste(output_file,".txt",sep="")
write.table(file=output_file,new_tbl,quote=F,row.names = F,sep="\t")
extract.ages(file=output_file,replicates = replicates, cutoff = cutoff, random = random)
}
extract.ages.tbl <- function(file = NULL,sep="\t", extant_species = c(), replicates = 1, cutoff = NULL, random = TRUE){
tbl = read.table(file=file,h=T,sep=sep,stringsAsFactors =F)
new_tbl = NULL # ADD EXTANT SPECIES
for (i in 1:dim(tbl)[1]){
if (tbl[i,1] %in% extant_species){
status="extant"
}else{status="extinct"}
species_name = gsub(" ", "_", tbl[i,1])
new_tbl = rbind(new_tbl,c(species_name,status,tbl[i,2:dim(tbl)[2]]))
}
if (dim(new_tbl)[2]==4){
colnames(new_tbl) = c("Species","Status","min_age","max_age")
}else{
colnames(new_tbl) = c("Species","Status","min_age","max_age","trait")
}
output_file = file.path(dirname(file),strsplit(basename(file), "\\.")[[1]][1])
output_file = paste(output_file,".txt",sep="")
write.table(file=output_file,new_tbl,quote=F,row.names = F,sep="\t")
extract.ages(file=output_file,replicates = replicates, cutoff = cutoff, random = random)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.