##' Create 'aprof' objects for usage with 'aprof' functions
##'
##' Creates an "aprof" object from the R-profiler's output and a source file.
##' The objects created through "aprof" can be used by the standard functions
##' plot, summary and print (more specifically:
##' \code{plot.aprof}, \code{summary.aprof} and \code{print.arof}).
##' See the example below for more details.
##'
##' Using aprof with knitr and within .Rmd or .Rnw documents
##' is not yet supported by the R profiler. Note that setting the
##' chuck option: engine="Rscript", disables line-profiling.
##' Line profiling only works in a interactive session (Oct 2015).
##' In these cases users are advised to use the standard
##' Rprof functions or "profr" (while setting engine="Rscript") and
##' not to rely on line-profiling based packages (for the time
##' being).
##' @title Create an 'aprof' object for usage in the package 'aprof'
##' @param src The name of the source code file (and path if not in the working
##' directory). The source code file is expected to be a
##' a plain text file (e.g. txt, .R), containing the code of the
##' previously profiled program. If left empty, some "aprof" functions
##' (e.g. \code{readLineDensity}) will attempt to extract this information from
##' the call stack but this is not recommended (as the success of
##' file name detection operations vary). Note that
##' functions that require a defined source file will fail if
##' the source file is not defined or detected in the call stack.
##'
##'
##' @param output The file name (and path if not in the working
##' directory) of a previously created profiling exercise.
##' @author Marco D. Visser
##' @examples
##' \dontrun{
##' ## create function to profile
##' foo <- function(N){
##' preallocate<-numeric(N)
##' grow<-NULL
##' for(i in 1:N){
##' preallocate[i]<-N/(i+1)
##' grow<-c(grow,N/(i+1))
##' }
##' }
##'
##' ## save function to a source file and reload
##' dump("foo",file="foo.R")
##' source("foo.R")
##'
##' ## create file to save profiler output
##' tmp<-tempfile()
##'
##' ## Profile the function
##' Rprof(tmp,line.profiling=TRUE)
##' foo(1e4)
##' Rprof(append=FALSE)
##'
##' ## Create a aprof object
##' fooaprof<-aprof("foo.R",tmp)
##' ## display basic information, summarize and plot the object
##' fooaprof
##' summary(fooaprof)
##' plot(fooaprof)
##' profileplot(fooaprof)
##'
##' ## To continue with memory profiling:
##' ## enable memory.profiling=TRUE
##' Rprof(tmp,line.profiling=TRUE,memory.profiling=TRUE)
##' foo(1e4)
##' Rprof(append=FALSE)
##' ## Create a aprof object
##' fooaprof<-aprof("foo.R",tmp)
##' ## display basic information, and plot memory usage
##' fooaprof
##'
##' plot(fooaprof)
##'
##' }
##' @seealso \code{\link{plot.aprof}}, \code{\link{summary.aprof}},
##' \code{\link{print.aprof}}, \code{\link{Rprof}} and
##' \code{\link{summaryRprof}}.
##' @return An aprof object
##' @concept Line profiling
##' @export
aprof <- function(src=NULL,output=NULL){
if(is.null(src)){
warning("src is empty, no source code file defined")} else {
if(!file.exists(src)) {stop(paste("The specified source ",
src, " does not appear to exist"))
}
if(is.na(file.info(src)$size)|file.info(src)$size<1){
stop("specified source file appears to be empty")}
}
if(!is.null(output)){
CallsInt <- readOutput(output)
if(is.null(CallsInt$calls)|length(CallsInt$calls)==0){
stop(paste("Rprof outputs appears to be empty,",
"were enough samples made by the profiler?"))}
} else { stop("No profiling output files defined")}
if(!is.null(CallsInt$mem)) {
aprofobject<-list(sourcefile=src,
calls=CallsInt$calls,
mem=CallsInt$mem,
interval=CallsInt$interval)
class(aprofobject) <- c("aprof","mem.aprof","list")
} else {
aprofobject<-list(sourcefile=src,calls=CallsInt$calls,
interval=CallsInt$interval)
class(aprofobject) <- c("aprof","list")
}
return(aprofobject)
}
# readOutput
#
# Reads and organises output files created by the R
# profiler. This is a lower-level function, used to create an "aprof class"
# in the function \code{aprof}.
#
# @param outputfilename The file name (and path if not in the working
# directory) of a previously created profiling exercise.
#
# @author Marco D. Visser
#
readOutput<-function(outputfilename="Rprof.out"){
#Read and prepare output file
RprofSamples<-readLines(outputfilename)
if(length(grep("line profiling",RprofSamples[1]))==0){
stop(paste("Line profiling is required.",
"\nPlease run the profiler with line profiling enabled"))}
Mem <- grepl("memory profiling",RprofSamples[1])
if(Mem){
tosplit <- grepl("#", RprofSamples[-1])
splPos<-regexpr(pattern = "^:[0-9]+:[0-9]+:[0-9]+:[0-9]+:",
text =RprofSamples[-1][tosplit])
meminfo <- !splPos==(-1)
cutlength <- attr(splPos,"match.length")
mem <- substr(RprofSamples[-1][tosplit][meminfo],1,cutlength[meminfo])
mem <- t(sapply(strsplit(mem,":"),function(X) as.numeric(X[-1])))
mem <- as.data.frame(mem)
colnames(mem) <- c("sm_v_heap","lrg_v_heap","mem_in_node")
mem$mb<-rowSums(mem)/1024^2
splPosReg <- regexpr(pattern = "\\\"|:.#|#F",
text =RprofSamples[-1][tosplit])
regular <- substring(RprofSamples[-1][tosplit],splPosReg)
regular <- gsub(":","",regular)
RprofSamples <- c(RprofSamples[1],regular)
mem$calllines <- which(meminfo)
}
splitCalls<- sapply(RprofSamples[-1],
function(X) strsplit(X, split = " "),USE.NAMES=FALSE)
##seperated function calls
calls<-sapply(splitCalls, function(x) rev(gsub("\"", "", x)))
##sample.interval (sample rate/micro second)
Samp.Int<-as.numeric(strsplit(RprofSamples[1],"=")[[1]][2])
##return function calls and interval
if(Mem){
return(list(calls=calls,mem=mem,interval=Samp.Int*1e-6))
} else {
return(list(calls=calls,interval=Samp.Int*1e-6))
}
}
#' readLineDensity
#'
#' Reads and calculates the line density (in execution time or memory)
#' of an aprof object returned by the \code{aprof} function.
#' If a sourcefile was not specified in the aprof object, then the first file
#' within the profiling information is assumed to be the source.
#'
#' @param aprofobject An object returned by \code{aprof}, which
#' contains the stack calls sampled by the R profiler.
#' @param Memprof Logical. Should the function return information
#' specific to memory profiling with memory use per line in MB?
#' Otherwise, the default is to return line call density and execution time
#' per line.
#' @author Marco D. Visser
#' @export
readLineDensity<-function(aprofobject=NULL,Memprof=FALSE){
if(!"aprof"%in%class(aprofobject)){ stop("no aprof object found,
check function inputs")}
calls <- aprofobject$calls
interval <- aprofobject$interval
TargetFile <- aprofobject$sourcefile
## find all files in the call stack
idfiles<-sapply(calls,function(X) length(grep("#File", X))>=1)
## extract files
CallFiles <- sapply(calls[idfiles],function(X) X[1])
if(is.null(TargetFile))
{
FileNumber<-"1:"
warning(paste("sourcefile is null", " assuming first file in
call stack is the source: ", CallFiles[1],sep=""))
if(!exists(CallFiles[1])){stop("source file was not defined and
does not seem to exist in the working directory.")}
} else{
unlistedCalls <- unlist(calls)
## add path or only stick to basename?
if(sum(unlistedCalls==TargetFile)==0){
if(sum(unlistedCalls==basename(TargetFile))>0){
TargetFile <- basename(TargetFile) } else {
warning(paste("specified source
file ", TargetFile, " is not in the list of files in the
profiler output: \n ", CallFiles,sep=""))
}
}
FileNumber<-unlistedCalls[which(unlistedCalls==TargetFile)+1]
FileCheck<-unlistedCalls[which(unlistedCalls==TargetFile)]
## Confirm that call stack corresponds to user supplied sourcefile
if(length(FileCheck)==0){
warning(paste("Some aprof functions may fail -->",
" user supplied source file",
TargetFile,
" does not seem to correspond to any",
" file in the profiler output.\n",
" Possible causes: \n" ,
"1) Source file was not profiled?\n",
"2) Spelling?\n",sep=""))
}
}
FileNumber <- substr(FileNumber,1,1)
## remove all file references
cleancalls<-sapply(calls[!idfiles],
function(x) gsub("#File", NA, x))
LineCalls<- lapply(cleancalls, function(X)
unique(X[grep(paste(FileNumber,"#",sep=''),X)],
USE.NAMES=FALSE))
## in case of multiple line calls;
if(any(sapply(LineCalls,length)>1)){
LineCalls <- sapply(LineCalls,function(X) X[1])
warning("Some line calls stripped - BUGCODE: 02022016")
}
LineCalls <- unlist(LineCalls)
Pathways<-unique(sapply(LineCalls, paste,collapse="-"))
## limit only those containing information
Pathways<-Pathways[grep("#",Pathways)]
filehash <- paste(FileNumber,"#",sep="")
LineDensity<-table(unlist(sapply(LineCalls,unique)))
names(LineDensity)<-gsub(filehash,"", names(LineDensity))
Line.Numbers<-as.numeric(names(LineDensity))
Call.Density<-as.numeric(LineDensity)
Time.Density<-Call.Density*interval
if(Memprof) {
MemLines <- as.integer(gsub(filehash,"", LineCalls))
TotalMem <- tapply(c(0,diff(aprofobject$mem$mb)),
MemLines,function(X) sum(abs(X)))
Finallist <-list(Line.Numbers=as.numeric(names(LineDensity)),
Call.Density=as.numeric(LineDensity),
Time.Density=Call.Density*interval,
Total.Calls=sum(as.numeric(LineDensity))+1,
Total.Time=sum(Call.Density*interval+interval),
Files=CallFiles,Total.Mem=TotalMem)
} else {
Finallist <-list(Line.Numbers=as.numeric(names(LineDensity)),
Call.Density=as.numeric(LineDensity),
Time.Density=Call.Density*interval,
Total.Calls=sum(as.numeric(LineDensity))+1,
Total.Time=sum(Call.Density*interval+interval),
Files=CallFiles)
}
return(Finallist)
}
#' Generic print method for aprof objects
#'
#' Function that makes a pretty table, and returns
#' some basic information.
#' @param x An aprof object returned by the
#' function \code{aprof}.
#' @param \dots Additional printing arguments. Unused.
#' @rdname print
#' @method print aprof
#' @export
print.aprof <- function(x,...){
aprofobject<-x
if(!is.aprof(aprofobject)){
stop("Input does not appear to be of the class \"aprof\"")}
if(!is.null(aprofobject$sourcefile)){
cat(paste0("\nSource file:\n",aprofobject$sourcefile," (",
length(readLines(aprofobject$sourcefile))
," lines).\n"))
}
if(!is.null(aprofobject$calls)){
interval <- aprofobject$interval
Finallist <- readLineDensity(aprofobject,Memprof=FALSE)
# Pretty table
CallTable<-cbind(as.character(Finallist$Line.Numbers),
Finallist$Call.Density,
Finallist$Time.Density)
if(nrow(CallTable)>1) {CallTable<-CallTable[order(CallTable[,2]),]}
rownames(CallTable)<-NULL
dimnames(CallTable)<-list(NULL, c("Line","Call Density",
"Time Density (s)"))
cat("\n Call Density and Execution time per line number:\n\n")
print.default(format(CallTable,digits = 3),print.gap = 2L,
quote = FALSE)
cat(paste("\n Totals:\n",
"Calls\t\t",Finallist$Total.Calls,"\n",
"Time (s)\t",Finallist$Total.Time,
"\t(interval = \t",interval,"(s))\n"))
if(length(Finallist$Files)>1) {
cat("\n Note: multiple files in the profiler output: \n")
print.default(Finallist$Files,print.gap = 2L,quote = FALSE)
}
if(!is.null(aprofobject$mem)){
cat("\n Memory statistics per line number:\n\n")
memtable <- readLineDensity(aprofobject,Memprof=TRUE)$Total.Mem
prettymem <- cbind(Line=names(memtable),
MB=round(as.double(memtable),3))
print.default(format(prettymem),print.gap = 2L,
quote = FALSE)
cat(paste("\n Total MBs (allocated and released).\n\n"))
}
} else {
stop("No profiler sampling information (removed?). Recreate aprof object.")
}
}
# MakeBranchPlot
#
# Incomplete function, originally meant to build
# and plot a tree showing the interdependancy
# between programs in the call stack
#
# @param calls Stack calls as returned by readOutput
# @param interval the profiler sampling interval
# @author Marco D. Visser
#
#
#Attempt to define brancing structure
MakeBranchPlot<-function(calls,interval){
############### Find stem ################
nlevel<-sapply(calls,length)
# shortest branching point
minlev<-min(nlevel)
# Tree height
maxlev<-max(nlevel)
# Number of unique branch pipes
pipes<-unique(sapply(calls, paste,collapse=" ",sep=" "))
pipesize<-table(sapply(calls, paste,collapse=" ",sep=" "))
############### define brances ################
branches<-vector(maxlev,mode="list")
for (i in seq_len(maxlev)){
branches[[i]]<-table(sapply(calls,function(X) X[i]))
}
# Build plot grid represented by a list for each branching
# level
#brance thickness
branTh<-sapply(branches,sum)
branchPropSize<-sapply(branches,function(X) X/max(branTh)*1.5)
branchSize<-sapply(branchPropSize,
function(X) ifelse(X<0.45,0.45,X))
xpos<-sapply(branches, function(x)
if(length(x)>1){seq(-1,1,length.out=length(x))}
else{0}
)
tmppos<-seq(-1,1,length.out=maxlev)
ypos<-sapply(1:maxlev,function(x)
rep(tmppos[x],length(xpos[[x]]))
)
graphics::par(mar=c(0,0,0,0))
graphics::plot(0,0,type='n')
for(i in seq_len(maxlev)){
graphics::text(xpos[[i]],ypos[[i]], names(branches[[i]]),
cex=branchSize[[i]])
}
}
#pipemodel<-function(calls){
## Number of unique branch pipes
# pipes<-unique(sapply(calls, # paste,collapse=" ",sep=" "))
# pipesize<-table(sapply(calls, # paste,collapse=" ",sep=" "))
# splitPipes<-sapply(pipes,
# function(X) strsplit(X, split = " "),USE.NAMES=F)
# NpipeElements<-sapply(splitPipes,length)
# plot(0,0,type="n",ylim=c(1,max(NpipeElements))
# for(i in 1:max(NpipeElements)){
# rnorm(1)
# }
#}
# PlotSourceCode
#
# Helper function, meant to do the actual plotting
# of sourcefile for full program of plotting
# the execution density per line (PlotExDens)
# Eventually these programs will be replace
# through the use of a aprof calls and plot.defaults.
#
# @param SourceFilename The file name (and path if not in
# the working directory) of source program.
#
# @author Marco D. Visser
#
#
PlotSourceCode<-function(SourceFilename){
CodeLines<-readLines(SourceFilename)
NCodeLines<-length(CodeLines)
CleanLines<-sapply(CodeLines,function(x)
gsub("\t", " ",x,fixed=TRUE),USE.NAMES=FALSE)
Nchar<-sapply(CleanLines,function(x)
strsplit(x,""),USE.NAMES=FALSE)
Nchar<-sapply(Nchar,function(x)
length(x),USE.NAMES=FALSE)
graphics::par(mar=c(0,0,0,0))
graphics::plot(0,0,xlim=c(-graphics::strwidth("M"),
max(Nchar)+graphics::strwidth("M")),
ylim=c(0,NCodeLines+0.5),
type='n',xaxt='n',yaxt='n',bty='n',xlab='',ylab='')
graphics::abline(h=seq_len(NCodeLines),col='white')
#Get best text size
Codewidth<-sapply(CleanLines,graphics::strwidth,USE.NAMES=FALSE)
Codeheight<-sapply(CleanLines,graphics::strheight,USE.NAMES=FALSE)
SizeText<-0.98*min(c(
diff(graphics::par("usr")[3:4])/(sum(Codeheight)*1.5),
diff(graphics::par("usr")[1:2])/(max(Codewidth)*1.1))
)
ypos<-length(CodeLines):1
graphics::text(1+graphics::strwidth("M"),ypos,
labels=CleanLines,adj=c(0,0),
cex=SizeText)
graphics::text(0+0.5*graphics::strwidth("M"),ypos,
labels=seq_len(length(CleanLines)),
adj=c(1,0),
cex=SizeText*0.90)
}
#' plot.aprof
#'
#' Plot execution time, or total MB usage when memory profiling,
#' per line of code from a previously profiled source file.
#' The plot visually shows bottlenecks in a program's execution time,
#' shown directly next to the code of the source file.
#'
#' @param x An aprof object as returned by aprof().
#' If this object contains both memory and time profiling information
#' both will be plotted (as proportions of total time and
#' total memory allocations.
#' @param y Unused and ignored at current.
#' @param \dots Additional printing arguments. Unused at current.
#'
#' @author Marco D. Visser
#' @examples
#' \dontrun{
#' # create function to profile
#' foo <- function(N){
#' preallocate<-numeric(N)
#' grow<-NULL
#' for(i in 1:N){
#' preallocate[i]<-N/(i+1)
#' grow<-c(grow,N/(i+1))
#' }
#' }
#'
#' ## save function to a source file and reload
#' dump("foo",file="foo.R")
#' source("foo.R")
#'
#' ## create file to save profiler output
#' tmp<-tempfile()
#'
#' ## Profile the function
#' Rprof(tmp,line.profiling=TRUE)
#' foo(1e4)
#' Rprof(append=FALSE)
#'
#' ## Create a aprof object
#' fooaprof<-aprof("foo.R",tmp)
#' plot(fooaprof)
#' }
#' @concept Line profiling
#' @rdname plot
#' @method plot aprof
#' @export
plot.aprof<-function(x,y,...){
aprofobject<-x
if(!is.aprof(aprofobject)){
stop("Input does not appear to be of the class \"aprof\"")}
AddMemProf<-!is.null(aprofobject$mem)
SourceFilename <- aprofobject$sourcefile
if(is.null(SourceFilename)){
stop("aprof object requires a defined source code file for plotting")}
NCodeLines<-length(readLines(SourceFilename))
LineDensity<-readLineDensity(aprofobject,Memprof=AddMemProf)
## Line reversed to correspond to source code plot
DensityData<-list(Lines=NCodeLines:1,
Time.Density=rep(0,NCodeLines))
DensityData$Time.Density[LineDensity$Line.Numbers]<-LineDensity$Time.Density
layoutmat<-matrix(c(
1,1,1,1,3,3, rep(c(2,2,2,2,4,4),10)),
byrow=TRUE,ncol=6)
graphics::layout(layoutmat)
opar<-graphics::par("mar","bg")
graphics::par(mar=c(0,0,0,0),bg='grey90')
graphics::plot(0,0,type='n',xaxt='n',yaxt='n',bty='n',xlab='',ylab='')
graphics::text(0,0.55,SourceFilename,cex=2)
graphics::segments(-.75,0,.75,0,lwd=1.2)
graphics::segments(c(-.75,.75),c(0,0),c(-.75,.75),c(-0.1,-0.1),lwd=1.2)
PlotSourceCode(SourceFilename)
graphics::plot(0,0,type='n',xaxt='n',yaxt='n',bty='n',xlab='',ylab='')
graphics::plot(DensityData$Time.Density,DensityData$Lines,
ylim=c(0,NCodeLines+0.5),
type='n',xaxt='n',yaxt='n',bty='n',xlab='',ylab='')
graphics::abline(h=seq_len(NCodeLines),col='white')
graphics::axis(3)
graphics::mtext("Density in execution time(s)",3,cex=.85,padj=-2.5)
graphics::segments(0, DensityData$Lines,
DensityData$Time.Density,DensityData$Lines
,lwd=4,col=grDevices::rgb(0,0,1,alpha=1))
graphics::points(DensityData$Time.Density,DensityData$Lines, pch=20)
if(AddMemProf){
graphics::axis(3,col="blue",lwd=2)
DensityData$MemStats <- rep(0,NCodeLines)
MemLines <- as.integer(names(LineDensity$Total.Mem))
DensityData$MemStats[MemLines]<-LineDensity$Total.Mem
DensityData$PlotStats <- DensityData$MemStats/max(DensityData$MemStats)
DensityData$PlotStats <- DensityData$PlotStats*
max(DensityData$Time.Density)
graphics::segments(0,DensityData$Lines+0.1,
DensityData$PlotStats,DensityData$Lines+0.1
,lwd=4,col=grDevices::rgb(1,0,0,alpha=1))
graphics::par(xaxt="s")
xloc <- range(DensityData$Time.Density)
graphics::axis(1,
at=c(xloc[1],xloc[2]/2,xloc[2]),
labels=round(c(0,max(DensityData$MemStats)/2,
max(DensityData$MemStats)),1),
line=-3.5,col='red',lwd=2,cex.lab=0.9)
graphics::mtext("Total memory usage (MB)",1,cex=.8,padj=-1.5)
graphics::points(DensityData$PlotStats,DensityData$Lines+.1, pch=20)
}
graphics::par(opar)
graphics::layout(1)
}
##' Line progression plot
##'
##' A profile plot describing the progression through each code
##' line during the execution of the program.
##'
##' Given that a source code file was specified in an "aprof" object
##' this function will estimate when each lines was executed. It
##' identifies the largest bottleneck and indicates this
##' on the plot with red markings (y-axis).
##' R uses a statistical profiler which, using system interrupts,
##' temporarily stops execution of a program at fixed intervals.
##' This is a profiling technique that results in samples of "the call stack"
##' every time the system was stopped. The function \code{profileplot} uses
##' these samples to reconstruct the progression through the
##' program. Note that the best results are obtained when a decent amount of
##' samples have been taken (relative to the length of the source code).
##' Use \code{print.aprof} to see how many samples (termed "Calls") of
##' the call stack were taken.
##' @param aprofobject An aprof object returned by the function
##' \code{aprof}
##' @author Marco D. Visser
##' @examples
##' \dontrun{
##' # create function to profile
##' foo <- function(N){
##' preallocate<-numeric(N)
##' grow<-NULL
##' for(i in 1:N){
##' preallocate[i]<-N/(i+1)
##' grow<-c(grow,N/(i+1))
##' }
##' }
##'
##' #save function to a source file and reload
##' dump("foo",file="foo.R")
##' source("foo.R")
##'
##' # create file to save profiler output
##' tmp<-tempfile()
##'
##' # Profile the function
##' Rprof(tmp,line.profiling=TRUE)
##' foo(1e4)
##' Rprof(append=FALSE)
##'
##' # Create a aprof object
##' fooaprof<-aprof("foo.R",tmp)
##' profileplot(fooaprof)
##' }
##' @seealso \code{\link{plot.aprof}}
##' @concept Line profiling
##' @export
profileplot <- function(aprofobject){
if(!is.aprof(aprofobject)){
stop("Input does not appear to be of the class \"aprof\"")}
SourceFilename <- aprofobject$sourcefile
if(is.null(SourceFilename)){
stop("aprof object requires a defined source code file for plotting")
}
TargetFile <- aprofobject$sourcefile
calls<-aprofobject$calls
interval <- aprofobject$interval
FileNumber<-unlist(calls)[which(unlist(calls)==TargetFile)+1]
FileNumber <- substr(FileNumber,1,1)
NCodeLines<-length(readLines(SourceFilename))
cleancalls<-sapply(calls, function(x)
gsub("#File", NA, x))
LineCalls<- unlist(sapply(cleancalls,
function(X) X[grep(paste(FileNumber,
"#",sep=''),X)]
,USE.NAMES=FALSE))
nLineCalls<-as.numeric(sapply(LineCalls,function(X)
strsplit(X,"1#")[[1]][2],USE.NAMES=FALSE))
timesteps<-seq(0,length(nLineCalls)*interval,interval)
callhistory <- c(1,nLineCalls)
LineDensity<-readLineDensity(aprofobject)
opar<-graphics::par("mar","bg")
maxtimesteps <- max(timesteps)
layoutmat<-matrix(c(rep(c(1,1,1,1,2,2),10)), byrow=TRUE,ncol=6)
graphics::layout(layoutmat)
graphics::par(mar=c(4,4,0.1,0.1),bg='grey90')
graphics::plot(0,0,xlim=c(0,maxtimesteps),ylim=c(1,NCodeLines),
type='n',xaxt='s',yaxt='s', xlab='',ylab='')
graphics::abline(h=1:NCodeLines,col='white')
graphics::mtext("Run time(s)",1,cex=.9,padj=3.4)
graphics::mtext("Line",2,cex=.9,padj=-3.4)
graphics::lines(c(timesteps,maxtimesteps), c(callhistory,NCodeLines),
lwd=2,col=grDevices::rgb(0,0,1,alpha=0.6))
graphics::text(0,1,"Start",col='red',adj=0,cex=1.2)
graphics::text(maxtimesteps,NCodeLines,"End",col='darkgreen',cex=1.2)
#largest bottlenecks
callcounts<-table(callhistory)
maxcalls<-as.numeric(names(which(callcounts==max(callcounts))))
graphics::axis(2,at=maxcalls,labels=maxcalls,col.axis='red',
lwd=1.2,col.ticks='red')
graphics::plot(0,0,ylim=c(1,NCodeLines),
xlim = c(0,max(LineDensity$Call.Density/LineDensity$Total.Calls)*1.1),
type='n',xaxt='s',yaxt='s', xlab='',ylab='')
graphics::abline(h = 1:NCodeLines, col = "white")
PerLineDensity <- numeric(NCodeLines)
PerLineDensity[LineDensity$Line.Numbers]<-LineDensity$Call.Density/
LineDensity$Total.Calls
connectedlines <- c(1:NCodeLines)-c(0,rep(.5,NCodeLines-2),0)
graphics::lines(y=connectedlines,x=PerLineDensity,type = "S",lwd=1.3)
graphics::abline(v=0,col='grey30',lty=3)
graphics::axis(4)
graphics::mtext("Line Density", 1, cex = .9, padj = 2.7)
graphics::par(opar)
graphics::layout(1)
}
#' is.aprof
#'
#' Generic lower-level function to test whether an object
#' is an aprof object.
#' @param object Object to test
#' @export
is.aprof <- function(object) {
inherits(object, "aprof")
}
## Amdahl's law
##
## function calculates the theoretical maximum
## speed up - at current scaling - of the profiled
## program using Amdahl's law.
##
## @param P proportion of the program under study
## @param S factor with which P can be sped-up
##
## @author Marco D. Visser
##
##
AmLaw<-function(P=1,S=2){
1/((1-P)+P/S)
}
#' summary.aprof, projections of code optimization gains.
#'
#' Summarizes an "aprof" object and returns a table with
#' the theoretical maximal improvement in execution
#' time for the entire profiled program when a given line
#' of code is sped-up by a factor (called S in the
#' output). Calculations are done using R's profiler
#' output, and requires line profiling to be switched on.
#' Expected improvements are estimated for the entire
#' program using Amdahl's law (Amdahl 1967), and note that
#' Calculations are subject to the scaling of the problem
#' at profiling. The table output aims to answer whether it is
#' worthwhile to spend hours of time optimizing bits of
#' code (e.g. refactoring in C) and, additionally,
#' identifies where these efforts should be focused.
#' Using aprof one can get estimates of the maximum possible
#' gain. Such considerations are important when one
#' wishes to balance development time vs execution time.
#' All predictions are subject to the scaling of the
#' problem.
#'
#' @param object An object returned by the function \code{aprof}.
#' @param \dots Additional [and unused] arguments.
#' @title Projected optimization gains using Amdahl's law.
#' @references Amdahl, Gene (1967). Validity of the Single Processor
#' Approach to Achieving Large-Scale Computing Capabilities. AFIS
#' Conference Proceedings (30): 483-485.
#' @author Marco D. Visser
#' @concept Line profiling
#' @rdname summary
#' @method summary aprof
#' @export
summary.aprof<-function(object,...){
aprofobject<-object
LineProf<-readLineDensity(aprofobject)
PropLines<-LineProf$Time.Density/LineProf$Total.Time
Speedups<-2^c(0:4)
SpeedTable<-sapply(Speedups,function(X) AmLaw(P=PropLines,S=X))
if(is.null(nrow(SpeedTable))) SpeedTable <- matrix(SpeedTable,nrow=1)
#Time improvement table
ExecTimeTable<-LineProf$Total.Time/SpeedTable
ExecTimeTable<-rbind(ExecTimeTable,LineProf$Total.Time/Speedups)
## limits of Amdahl's law as S goes to inf
SpeedTable<-cbind(SpeedTable,1/(1-PropLines))
dimnames(SpeedTable)<-list(paste("Line*:",
LineProf$Line.Numbers,":"),
c(Speedups,"S -> Inf**"))
SpeedTable<-SpeedTable[order(PropLines,decreasing=TRUE),]
dimnames(ExecTimeTable)<-list(c(paste("Line*:",
LineProf$Line.Numbers,":"),
"All lines"),Speedups)
ExecTimeTable<-ExecTimeTable[order(
c(PropLines,sum(PropLines)),
decreasing=TRUE),]
cat("Largest attainable speed-up factor for the entire program\n
when 1 line is sped-up with factor (S): \n\n")
cat("\t Speed up factor (S) of a line \n")
print.default(format(SpeedTable,digits = 3),print.gap = 2L,
quote = FALSE)
cat("\nLowest attainable execution time for the entire program when\n
lines are sped-up with factor (S):\n\n")
cat("\t Speed up factor (S) of a line \n")
print.default(format(ExecTimeTable,digits = 3),print.gap = 2L,
quote = FALSE)
cat("\n Total sampling time: ",round(LineProf$Total.Time,2) ,
" seconds")
cat("\n * Expected improvement at current scaling")
cat("\n ** Asymtotic max. improvement at current scaling\n\n")
invisible(SpeedTable)
}
#' targetedSummary
#'
#' Allows a detailed look into certain lines of code,
#' which have previously been identified as bottlenecks
#' in combination with a source file.
#'
#' @param target The specific line of code to take a detailed look
#' at. This can be identified using \code{summary.aprof}.
#' @param aprofobject object of class "aprof" returned by
#' the function \code{aprof}.
#' @param findParent Logical, should an attempt be made to find
#' the parent of a function call? E.g. "lm" would be a parent call of
#' "lm.fit" or "mean" a parent call of "mean.default".
#' Note that currently, the option only returns the most frequently
#' associated parent call when multiple unique parents exist.
#' @author Marco D. Visser
#'
#' @export
targetedSummary<-function(target=NULL,aprofobject=NULL,findParent=FALSE){
if(is.null(target)){stop("Function requires target line number")}
if(is.null(aprofobject$calls)){
stop(paste("Calls appear empty - no call stack samples.",
"Did the program run too fast? ")) }
calls <- aprofobject$calls
interval <- aprofobject$interval
if(is.null(aprofobject$sourcefile)) {
TargetFile<-"1#"
warning("sourcefile empty, assumed first file in callstack is the source")
} else {
sourcefile <- aprofobject$sourcefile
FileNumber<-unlist(calls)[which(unlist(calls)==sourcefile)+1]
TargetFile <- paste(substr(FileNumber,1,1),"#",sep="")
}
# identify all unique file names
FileNames<-unlist(calls)[which(unlist(calls)=="#File")-2]
# What was the total execution time?
TotalTime<-length(calls)*interval
# Identify lines of interest
Lcalls<-sapply(calls,function(x) gsub(TargetFile,"L",x),USE.NAMES=FALSE)
##Replace all file references with Actual file names
for(i in seq_len(length(FileNames))){
Lcalls<-sapply(Lcalls,function(x) gsub(paste(i,"#",sep='')
,paste(FileNames[i],
'#',sep=''),
x),USE.NAMES=FALSE)
}
#Subset to target line
tlines <- sapply(Lcalls,function(X) paste("L",target,sep='')%in%X)
TargetCalls<-Lcalls[tlines]
if(sum(tlines)==0){stop("Target line not found in profiler output.\n
Confirm target line and run again") }
## Remove all functions calls before target line
trimmedTargetCalls<-lapply(TargetCalls,function(X)
X[1+max(grep(paste("L",target,sep=''),
X)):length(X)])
# Count function calls
CallCounts<-table(stats::na.omit(unlist(trimmedTargetCalls)))
## Find parent call before target call?
if(findParent==TRUE) {
## Find unique parent calls for each unique call
parentCalls <- vector(mode="character", length=
length(CallCounts))
for(i in seq_len(length(CallCounts))){
parentCalls[i]<-names(
sort(table(unlist(
lapply(TargetCalls,
function(X) X[which(names(CallCounts)[i]==X)[1]-1]
))),decreasing=TRUE)[1])
}
## Sort decending and save as data.frame
CallOrder <- order(CallCounts,decreasing=TRUE)
CallCounts <- CallCounts[CallOrder]
parentCalls <- parentCalls[CallOrder]
FinalTable<-data.frame(Function=names(CallCounts),
Parent=parentCalls, Calls=CallCounts,
Time=CallCounts*interval)
} else {
CallCounts <- sort(CallCounts,decreasing=TRUE)
FinalTable <- data.frame(Function=names(CallCounts),
Calls=CallCounts,
Time=CallCounts*interval)
}
row.names(FinalTable) <- NULL
return(FinalTable)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.