#' From a main folder get all scores
#' In theory, the folder `mainfold` should contain the results of an abc as done by ./abcdir/abc.R
#' which, in theory again, should look like a set of subfolder `mainfoldresults/mainfoldresultsX` where X depends on the number of simulation and the way they are divided in abc.R
#'@param mainfold mainfolder with allscores
#'@param idscores the id of the score
#'@param metrics the ids of the metrics use for the score
#'@param log if true, show the name of the folder scanned
#'@return a list of list of list
getAllscores <- function(mainfold,idscores=c("kc","kokc","qc","kcrev","kokcrev"),metrics=c("depth","breadth","size"),log=T,lim=NULL){
#With all the folder we can now get all the scores
if(is.character(lim))
allfolders=lim
else
allfolders=getlistfoldsubfold(mainfold,lim=lim)
names(idscores)=idscores
names(metrics)=metrics
#That take a while. use of Rmpi could be good.
allscores=lapply(idscores,function(iscore)
{
tis=lapply(allfolders,function(j)
{
if(log)print(j)
load(file.path(j,"scores.bin"));
if(length(metrics)<2)
sapply(scores,function(tmps)
tryCatch(unlist(tmps[[iscore]][metrics]),error=function(e){res=rep(NA,length(metrics));names(res)=metrics;res})
)
else
as.data.frame(
t(
sapply(scores,function(tmps)
tryCatch(unlist(tmps[[iscore]][metrics]),error=function(e){res=rep(NA,length(metrics));names(res)=metrics;res})
)
)
)
})
if(length(metrics)<2){
res=c()
res[[metrics]]=unlist(tis)
tis=res
}
else
tis=do.call("rbind",tis)
tis
}
)
names(allscores)=idscores
return(allscores)
}
##This function is a hack when dimension of metrics and idscores == 1, that SHOULD be included in previous function as a test case
getAllscoresB <- function(mainfold,idscores=c("kc","kokc","qc","kcrev","kokcrev"),metrics=c("depth","breadth","size"),log=T,lim=NULL){
#With all the folder we can now get all the scores
if(is.character(lim))
allfolders=lim
else
allfolders=getlistfoldsubfold(mainfold,lim=lim)
names(idscores)=idscores
names(metrics)=metrics
#That take a while. use of Rmpi could be good.
tis=sapply(allfolders,function(j){
if(log)print(j)
load(file.path(j,"scores.bin"));
sapply(scores,function(at)as.numeric(at["qc"][[1]][[1]]))
})
res=c()
res[[idscores]]=c()
res[[idscores]][[metrics]]=tis
return(res)
}
#' From a main folder get all parameters
#' In theory, the folder `mainfold` should contain the results of an abc as done by ./abcdir/abc.R
#' which, in theory again, should look like a set of subfolder `mainfoldresults/mainfoldresultsX` where X depends on the number of simulation and the way they are divided in abc.R
#'@param mainfold mainfolder with allscores
#'@param log if true, show the name of the folder scanned
#'@param lim if lim is a vector of int then take the folder that have index in lim, if lim is a character, directly take lim as the list of folders
#'@param check if check is true then when check if all parameters led to good simulation (ie number of parameters -== number of score)
#' note that with check use of checkIfNumberGood should be useless
#'@return a dataframe with all parameters
getAllparameters <- function(mainfold,log=F,lim=NULL,check=T){
if(is.character(lim))
allfolders=lim
else
allfolders=getlistfoldsubfold(mainfold,lim=lim)
allparameters=lapply(allfolders,function(i)
{
if(log)print(i)
load(file.path(i,"parameters.bin"))
if(check){ ##check if all simulation went well
load(file.path(i,"scores.bin"))
o=length(scores)
if(o != length(parameters[[1]])){ #if length of parameter is different than number of score we have to rely on the id stored to make the correspondance score/parameters:
o=sapply(scores,function(u)tryCatch(u$id,error=function(e){NA})) #get ids of simulations that were stored w/ score
if(log)print(paste("num ids:",length(o)))
parameters=lapply(parameters,"[",o)
}
}
return(as.data.frame(parameters))
})
allparameters.dataframe=do.call("rbind.data.frame",allparameters)
return(allparameters.dataframe)
}
getlistfoldsubfold <- function(mainfold,lim){
allfolders= list.dirs(mainfold,recursive=F)
if(!is.null(lim))allfolders=paste0(file.path(mainfold,basename(mainfold)),lim)
return(allfolders)
}
#' Return posterior given a priori distribution, score and a number of particle to accept
#'@param allscores a list of list of score
#'@param allparameters.dataframe a dataframe with all parameters
#'@param n an integer
#'@return a list of list, with for each score and each metrics a dataframe with subset of the parameter for with score of the simulation is in top `n`
getAllposteriors <- function(allscores,allparameters.dataframe,n=500)
lapply(allscores,lapply,function(u)allparameters.dataframe[rank(u,ties="first")<n,])
#' Return the best set of parameter given a score an
#'@param allscores a list of list of score
#'@param allparameters.dataframe a dataframe with all parameters
#'@param s id of the function use to compute the score
#'@param m metric use to compute the score
#'@return the list of parameter that gave the best score
getbest <- function(allscores,allparameters.dataframe,s,m)
unlist(allparameters.dataframe[which(rank(allscores[[s]][[m]],ties="first") == 1),])
#' Wrapper for Cascades3D that automatically convert a list of arguments
#'@param listofparameters a list of parameters normally given to cascades3D
#'@return same as cascades3D
cascades3D.list <- function(listofparameters,metrics=c("size","depth","breadth")){
if(length(listofparameters)==9)
cascades3D(
log=F,
N=listofparameters["N"],
stime=listofparameters["stime"],
dtime=listofparameters["dtime"],
IC=listofparameters["IC"],
Nmax=listofparameters["Nmax"],
mu_c=listofparameters["mu_c"],
R=listofparameters["R"],
time=listofparameters["repetition"],
captl=listofparameters["captl"],
utility = rep(0,listofparameters["R"]),
metrics=metrics,
betadistrib = rep(0,listofparameters["N"])
)
else
{
prior_beta=c(-100,-10,0,10,100)
prior_utility=c(-1,0,1)
util=grep("utility.*",names(listofparameters))
betad=grep("betaDistrib.*",names(listofparameters))
cascades3D(
log=F,
metrics=metrics,
N=listofparameters["N"],
stime=listofparameters["stime"],
dtime=listofparameters["dtime"],
IC=listofparameters["IC"],
Nmax=listofparameters["Nmax"],
R=listofparameters["R"],
time=listofparameters["repetition"],
captl=listofparameters["captl"],
utility = generateDistribeFromFrequencies(prior_utility,listofparameters["R"],listofparameters[util]),
betadistrib = generateDistribeFromFrequencies(prior_beta,listofparameters["N"],listofparameters[betad])
)
}
}
#' Wrapper for random cascade
#'@param listofparameters a list of parameters normally given to cascades3D
#'@return same as randomCascades
randomCascades.list <- function(listofparameters,...){
z=list(...)
alberto=NULL
if(!is.null(z$alberto))alberto=z$alberto
if(length(listofparameters) == 6){ #topfive
print(paste0("random cascade topfive alberto=",alberto))
return(randomCascades(
Nmin=listofparameters["Nmin"],
mu=listofparameters["mu"],
t_step=listofparameters["t_step"],
tau=listofparameters["tau"],
conformity=F,
topfive=T,
C=listofparameters["C"],
TF=listofparameters["TF"],
...
))
}
if(length(listofparameters) == 5){#conformity
print(paste0("random cascade conformity"))
return(randomCascades(
Nmin=listofparameters["Nmin"],
mu=listofparameters["mu"],
t_step=listofparameters["t_step"],
tau=listofparameters["tau"],
conformity=T,
beta=listofparameters["beta"],
...
))
}
if(length(listofparameters) == 4){#random
print(paste0("random cascade neutral"))
return(randomCascades(
Nmin=listofparameters["Nmin"],
mu=listofparameters["mu"],
t_step=listofparameters["t_step"],
tau=listofparameters["tau"],
topfive=F,
conformity=F,
...
))
}
}
#' go within a folder with result and extract the good simulation
#'@param ind integer of the indice (ie id) of the simulation
#'@param expefold folder within which we have to look
#'@param full boolean saying is the full simulation need to be retrieves
#'@return a list with the score and the list of parameters
getSimuGivenIndices <- function(ind,expefold,full=F){
i=1
totest=0:(length(list.files("../testRumorSize/"))-1)
for(t in totest){
load(file.path(expefold,paste0(basename(expefold),t),"scores.bin"))
for(s in 1:length(scores)){
if(ind == i){
if(full){
load(file.path(expefold,paste0(basename(expefold),t),"fullsim",paste0("res_sim_",s,".bin")))
return(rud)
}
else{
load(file.path(expefold,paste0(basename(expefold),t),"parameters.bin"))
return(list(scores=scores[s],param=sapply(parameters,"[[",s)))
}
}
i=i+1
}
}
}
#'Function that helps to run new test to check if score correspond do parameters.
#' allow to be sure that parameters and scores read from the folders make sense.
#' @param numberparam number of parameter that will be tested
#' @param rep number of time the parameter set will be repeted
fromTestScoresVSParam <- function(theparam,thescores,numberparam=20,repet=10,modelwrapper=cascades3D.list,data=allru$size,scorefun=quantilediff,rumor=F){
stopifnot(length(thescores)== nrow(theparam))
allsample=lapply(sample(length(thescores),numberparam),function(j)
{
nrep=replicate(repet,
{
simu=modelwrapper(unlist(theparam[j,]))
if(rumor)rum=getRumors(simu)
else rum=simu$size
score=scorefun(rum,data)
print(score)
res=c()
res$simu=simu
res$score=score
res$id=j
res
} )
return(nrep)
}
)
allsample.df=do.call("rbind.data.frame",lapply(allsample,function(at)as.data.frame(t(at[2:3,]))))
allsample.df$score=unlist(allsample.df$score)
allsample.df$id=unlist(allsample.df$id)
allsample.df$simscore=thescores[allsample.df$id]
allsample.df.mean=tapply(allsample.df$score,allsample.df$id,mean)
allsample.df$imean=allsample.df.mean[as.character(allsample.df$id)]
plot(allsample.df$score ~ allsample.df$imean,ylim=range(allsample.df$score,allsample.df$simscore))
points(allsample.df$simscore ~ allsample.df$imean,col="red")
}
#' Same before but just return list of the simulation rerun
#' @param numberparam number of parameter that will be tested
#' @param rep number of time the parameter set will be repeted
reruns <- function(theposterior,thescores=NULL,samplepost=100,repet=10,modelwrapper=cascades3D.list,data=allru$size,scorefun=quantilediff,rumor=F,type="all",log=F,...){
##To set the color scale I use the trick of calcualting all the simulation first and then attribue the color. Ungly so far.
print(samplepost)
#print(paste0("alberto=",alberto))
if(type == "all" | type == "sampling"){
if(type == "sampling"){
resample=sample.int(nrow(theposterior),samplepost,replace=T)
repet=1
}
else
resample=sample.int(nrow(theposterior),samplepost)
resampledpost=theposterior[resample,]
if(!is.null(thescores)){
resamplescore=thescores[resample]
orderpost=resampledpost[order(resamplescore,decreasing=T),]
}
orderpost=resampledpost
}
else if(type == "median")
orderpost=as.data.frame(lapply(theposterior,median))
else if(type == "best")
orderpost=as.data.frame(theposterior[which.max(thescores),])
replpost=lapply(1:nrow(orderpost),function(r)
{
p=orderpost[r,]
return(lapply(1:repet,function(o)
{
if(log)print(o)
sims=tryCatch(modelwrapper(unlist(p),...),error=function(e){print(e);NULL})
if(is.null(sims))return(list(score=NA,distrib=NA))
if(rumor) rums=getRumors(sims)
else rums=sims$size
sc=scorefun(rums,data)
return(list(score=sc,distrib=rums))
}
)
)
})
return(replpost)
}
#' Same before but print distribution function that helps to run test to check if score correspond do parameters.
#' and then
#' @param numberparam number of parameter that will be tested
#' @param rep number of time the parameter set will be repeted
#' @param clrs if "score" the curves are colored givent the score function used
#' @param ... parameters transmitted to plot
plotPosteriorsCheck <- function(theposterior,thescores,samplepost=100,repet=10,modelwrapper=cascades3D.list,data=allru$size,scorefun=quantilediff,rumor=F,type="all",clrs="score",alberto=F,...){
reruns(theposterior,thescores,samplepost=100,repet=10,modelwrapper=modelwrapper,data=allru$size,scorefun=quantilediff,rumor=F,type="all",clrs="score",alberto=alberto)
plotCCFD(data,...)
if(clrs == "score"){
print(replpost)
allscores=unlist(lapply(replpost,lapply,"[[","score"))
#rrs=range(allscores)
#scale=(rrs[2]-rrs[1])/100
#spacescore=seq(rrs[1],rrs[2],scale)
#cols=topo.colors(length(spacescore))
cols=topo.colors(length(unique(allscores)))
names(cols)=sort(unique(allscores))
na=lapply(replpost,lapply,function(s)pointsCCFD(s$distrib,col=alpha(cols[as.character(s$score)],.6)))
}
else
lapply(replpost,lapply,function(s)pointsCCFD(s$distrib,col=clrs))
return(replpost)
}
#' Take a folder name and return the list of id of the folders that have exact same number of parameters and scores
#' @param folder the name of a folder as the one used in getlistfoldsubfold
checkIfNumberGood <- function(folder,inv=T){
alllength=lapply(getlistfoldsubfold(folder,lim=NULL),function(i){tryCatch({load(file.path(i,"parameters.bin"));load(file.path(i,"scores.bin"));c(i,length(parameters[[1]]),length(scores))},error=function(e)c(i,0,-1))})
if(inv)return(unlist(lapply(1:length(alllength),function(i){s=alllength[[i]];if(s[2]==s[3])s[1]})))
else return(unlist(lapply(1:length(alllength),function(i){s=alllength[[i]];if(s[2]!=s[3])s[1]})))
return(rem)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.