Nothing
# To extract the next line from the file
extractLine <- function(in_file) {
readLines(in_file, n=1)
}
easySplit <- function(line, splitter) {
strsplit(line, splitter)[[1]]
}
#' Input data from CPP output into a list of data frames
#' @param filename The filename of the output generated by the simulation
#' @return A list containing
#' \describe{
#' \item{params}{the parameters used to run the simulation}
#' \item{sequences}{a data frame of the sequences over time and their similarity
#' to the initial sequence
#' \itemize{
#' \item step - timestep in the simulation
#' \item realTime - time since the start of the simulation (in millions of years)
#' \item sequenceId - the unique ID of the sequence (to track it over time);
#' initial sequence has sequenceId 0 to (numInitialCopies-1)
#' \item parentMain - the unique ID of the sequence this burst from;
#' (-1 if nothing)
#' \item parentOther - the unique ID of the sequence its parent recombined with;
#' (-1 if nothing)
#' \item distanceToInitial - the distance to the initial sequence
#' \item isActive - whether or not the sequence is capable of transposition
#' }}
#' \item{pairwise}{a data frame of the pairwise similarity of sequences over time
#' \itemize{
#' \item step - timestep in the simulation
#' \item realTime - time since the start of the simulation (in millions of years)
#' \item sequenceId1 - an ID of a sequence present at the time
#' \item sequenceId2 - an ID of a different sequence present at the time; not all pairs
#' are given - that is, for sequences a and b, either (a, b) or (b, a)
#' is present as a row but not both
#' \item distancePairwise - the distance between the two sequences
#' }}
#' \item{familyRepresentatives}{a data frame of the family representatives
#' for the retrotransposons over time
#' \itemize{
#' \item step - timestep in the simulation
#' \item realTime - time since the start of the simulation (in millions of years)
#' \item familyId - the unique ID of the family representative (to track it over time)
#' \item creationTime - time this family representative was created (in millions of years)
#' \item sequenceId - a sequence belonging to that family (sequences belonging to a
#' family are listed as separate rows, and all sequences belonging
#' to that family are listed)
#' }}
#' \item{familyPairwise}{a data frame of the pairwise similarity of the
#' family representatives for the retrotransposons over time
#' \itemize{
#' \item step - timestep in the simulation
#' \item realTime - time since the start of the simulation (in millions of years)
#' \item familyId1 - an ID of a family representative present at the time
#' \item familyId2 - an ID of a different family representative present at the time;
#' not all pairs are given - that is, for sequences a and b,
#' either (a, b) or (b, a) is present as a row but not both
#' \item distancePairwise - the distance between the two family representatives
#' }}
#' }
#' @examples
#' \dontrun{
#' data <- parseSimulationOutput('simulationOutput.out')
#' }
#' @export
parseSimulationOutput <- function(filename)
{
# TODO: Return a good error message if the file cannot be parsed
data <- list()
data$params <- list()
data$sequences <- data.frame()
data$pairwise <- data.frame()
data$familyRepresentatives <- data.frame()
data$familyPairwise <- data.frame()
con = file(filename, "r")
# basic variables
t <- -1
realTime <- -1
tags <- NULL
while (TRUE)
{
nextLine <- extractLine(con)
if (length(nextLine) == 0) {
break # EOF
}
if (nextLine == "Param<")
{
while(TRUE) {
nextLine <- extractLine(con)
if(nextLine == ">Param") { break }
splitLine <- easySplit(nextLine, ":")
param <- splitLine[1]
value <- splitLine[2]
if (param %in% c('SequenceParams_initialSequence',
'MutationParams_model',
'OutputParams_outputFileName'
)) {
data$params[[param]] <- value
}
else if (param %in% c('SeedParams_toSeed')) {
data$params[[param]] <- as.logical(value)
}
else {
data$params[[param]] <- as.numeric(value)
}
}
}
else if (nextLine == "Init<")
{
timestep <- as.numeric(trimws(extractLine(con), "left", whitespace = "@"))
numSeq <- as.numeric(trimws(extractLine(con), "left", whitespace = "!"))
seqIds <- rep(NA_integer_, numSeq)
parMains <- rep(NA_integer_, numSeq)
parOthers <- rep(NA_integer_, numSeq)
initDists <- rep(NA_integer_, numSeq)
actives <- rep(NA_integer_, numSeq)
i <- 1
while (i <= numSeq) {
nextLine <- extractLine(con)
splitLine <- easySplit(nextLine, ":")
seqIds[i] <- splitLine[1]
parMains[i] <- splitLine[2]
parOthers[i] <- splitLine[3]
initDists[i] <- splitLine[4]
actives[i] <- splitLine[5]
i <- i+1
}
steps <- rep(timestep, numSeq)
realTimes = steps * data$params$SimulationParams_timePerStep
data$sequences <- rbind(data$sequences,
data.frame(step = steps, realTime = realTimes,
sequenceId = as.numeric(seqIds),
parentMain = as.numeric(parMains),
parentOther = as.numeric(parOthers),
distanceToInitial = as.numeric(initDists),
isActive = as.logical(actives)
)
)
extractLine(con)
}
else if (nextLine == "Pair<")
{
timestep <- as.numeric(trimws(extractLine(con), "left", whitespace = "@"))
numSeq <- as.numeric(trimws(extractLine(con), "left", whitespace = "!"))
seqId1s <- rep(NA_integer_, numSeq*(numSeq-1)/2)
seqId2s <- rep(NA_integer_, numSeq*(numSeq-1)/2)
pairDists <- rep(NA_integer_, numSeq*(numSeq-1)/2)
i <- 1
while(TRUE) {
nextLine <- extractLine(con)
if(nextLine == ">Pair") {
if (i > 1) {
steps <- rep(timestep, i-1)
realTimes = steps * data$params$SimulationParams_timePerStep
data$pairwise <- rbind(data$pairwise,
data.frame(step = steps, realTime = realTimes,
sequenceId1 = as.numeric(seqId1s[1:(i-1)]),
sequenceId2 = as.numeric(seqId2s[1:(i-1)]),
distancePairwise = as.numeric(pairDists[1:(i-1)]))
)
}
break
}
splitLine <- easySplit(nextLine, ":")
seqId1s[i] <- splitLine[1]
seqId2s[i] <- splitLine[2]
pairDists[i] <- splitLine[3]
i <- i+1
}
}
else if (nextLine == "FamTags<")
{
timestep <- as.numeric(trimws(extractLine(con), "left", whitespace = "@"))
numFam <- as.numeric(trimws(extractLine(con), "left", whitespace = "!"))
numSeq <- as.numeric(trimws(extractLine(con), "left", whitespace = "!"))
famIds <- rep(NA_integer_, numSeq*numFam)
seqId2s <- rep(NA_integer_, numSeq*numFam)
pairDists <- rep(NA_integer_, numSeq*numFam)
i <- 1
while (i <= numFam) {
nextLine <- extractLine(con)
splitLine <- easySplit(nextLine, ":")
famId <- splitLine[1]
creationId <- splitLine[2]
seqIds <- easySplit(splitLine[3], ",")
steps <- rep(timestep, length(seqIds))
data$familyRepresentatives <- rbind(data$familyRepresentatives,
data.frame(step = steps, realTime = steps*data$params$SimulationParams_timePerStep,
familyId = rep(as.numeric(famId), length(seqIds)),
creationTime = rep(as.numeric(creationId), length(seqIds)),
sequenceId = as.numeric(seqIds)
)
)
i <- i+1
}
}
else if (nextLine == "FamDist<")
{
timestep <- as.numeric(trimws(extractLine(con), "left", whitespace = "@"))
numFam <- as.numeric(trimws(extractLine(con), "left", whitespace = "!"))
famId1s <- rep(NA_integer_, numFam*(numFam-1)/2)
famId2s <- rep(NA_integer_, numFam*(numFam-1)/2)
pairDists <- rep(NA_integer_, numFam*(numFam-1)/2)
i <- 1
while(TRUE) {
nextLine <- extractLine(con)
if(nextLine == ">FamDist") {
if (i > 1) {
steps <- rep(timestep, i-1)
realTimes = steps * data$params$SimulationParams_timePerStep
data$familyPairwise <- rbind(data$familyPairwise,
data.frame(step = steps, realTime = realTimes,
familyId1 = as.numeric(famId1s[1:(i-1)]),
familyId2 = as.numeric(famId2s[1:(i-1)]),
distancePairwise = as.numeric(pairDists[1:(i-1)]))
)
}
break
}
splitLine <- easySplit(nextLine, ":")
famId1s[i] <- splitLine[1]
famId2s[i] <- splitLine[2]
pairDists[i] <- splitLine[3]
i <- i+1
}
}
}
close(con)
return(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.