Nothing
######################################################################################
# tdmMapDesLoad
#
#' Load the mapping files.
#'
#' Load the map files \code{"tdmMapDesign.csv"} and optionally
#' also \code{"userMapDesign.csv"} and store them in \code{tdm$map} and \code{tdm$mapUser},
#' resp. These maps are used by \code{\link{tdmMapDesApply}}. \cr
#' \code{"tdmMapDesign.csv"} is searched in the TDMR library path \code{find.package("TDMR")}.
#' (For the developer version: \code{<tdm$tdmPath>/inst}). \cr
#' \code{"userMapDesign.csv"} is searched in tdm$path (which is getwd() if the user did not define tdm$path).
#'
#' @param tdm list, needed for \code{tdm$tdmPath} and \code{tdm$path}
#' @return \code{tdm}, the modified list with new elements \code{tdm$map} and \code{tdm$mapUser}
#' @seealso \code{\link{tdmMapDesApply}}
#' @export
tdmMapDesLoad <- function(tdm=list()) {
if (is.null(tdm$tdmPath)) {
mapPath <- find.package("TDMR"); # this is for the package version
} else {
mapPath <- paste(tdm$tdmPath,"inst",sep="/"); # this is for the developer source version
}
tdmMapFile=paste(mapPath,"tdmMapDesign.csv",sep="/")
if (!file.exists(tdmMapFile)) stop(sprintf("Could not find map file %s",tdmMapFile));
tdm$map <- read.table(tdmMapFile,sep=";",header=T);
cat(sprintf("Read tdmMapDesign.csv from %s\n",mapPath));
#userMapDir=ifelse(is.null(tdm$mainFile),".",dirname(tdm$mainFile));
userMapFile=paste(tdm$path,"userMapDesign.csv",sep="");
if (file.exists(userMapFile)) {
tdm$mapUser <- read.table(userMapFile,sep=";",header=T);
cat(sprintf("Read userMapDesign.csv from %s\n",tdm$path));
} else {
tdm$mapUser <- NULL;
cat(sprintf("Note: No file userMapDesign.csv in %s\n",tdm$path));
}
tdm;
}
# tdmMapDesApply
#
#' Apply the mapping from \code{des} to \code{opts}.
#'
#' For each variable which appears in .roi (and thus in design point data frame \code{des}):
#' set its counterpart in list \code{opts} to the values of the \code{k}-th row in \code{des}.
#' For each variable not appearing: leave its counterpart in \code{opts} at its default value from \code{\link{defaultOpts}}.
#'
#' @param des design points data frame
#' @param opts list of options
#' @param k apply mapping for the \code{k}-th design point
#' @param spotConfig list, we needed here \code{spotConfig$alg.roi} and \code{envT$mapUser}, see \code{\link{tdmMapDesLoad}},
#' and in addition \code{envT$spotConfig$alg.roi}
#' @param tdm list, we need here \code{tdm$map} and \code{tdm$mapUser}
#' @return \code{opts}, the modified list of options
#' @seealso \code{\link{tdmMapDesLoad}}
#' @export
tdmMapDesApply <- function(des,opts,k,spotConfig,tdm) {
cRound <- function(n,map,x) {
x <- ifelse(map$isInt[n]==1,round(x),x);
x;
}
setMapValue <- function(n,map,des,opts,k) {
cmd = paste("if(!is.null(des$",map$roiValue[n],")) ",map$optsValue[n],"=cRound(n,map,des$",map$roiValue[n],"[k])",sep="");
# (The check "if (!is.null...)" assures that tdmMapDes will work for all different .roi files.)
eval(parse(text=cmd));
opts;
}
# check whether each parameter column in des can be found in map or mapUser (thus whether it can be mapped to a variable in opts):
pNames=row.names(spotConfig$alg.roi);
for (d in pNames) {
if (length(which(tdm$map$roiValue==d))+length(which(tdm$mapUser$roiValue==d))==0)
stop(sprintf("tdmMapDesApply: cannot find a mapping for design variable %s.\n ",d),
sprintf("Please check spelling in spotConfig$alg.roi \n"),
sprintf("or extend tdmMapDesign.csv or userMapDesign.csv appropriately!\n"));
}
if (nrow(tdm$map)>0) {
for (n in 1:nrow(tdm$map)) {
opts <- setMapValue(n,tdm$map,des,opts,k);
}
}
if (!is.null(tdm$mapUser)) {
if (nrow(tdm$mapUser)>0) {
for (n in 1:nrow(tdm$mapUser)) {
opts <- setMapValue(n,tdm$mapUser,des,opts,k);
}
}
}
# TODO: check and extend this for arbitrary class length
if (!is.null(opts$CLS.CLASSWT) && is.na(opts$CLS.CLASSWT[1])) opts$CLS.CLASSWT[1]=10;
opts;
}
######################################################################################
tdmMapDesInt <- function(des,printSummary=T,spotConfig=NULL)
{
# the following is a fix for the fact that the current SPOT version does not
# generate INTs but FLOATs for each INT in the .roi file.
# We hard-code a round() for certain names we know that they only can be INT:
if (!is.null(des$MTRY)) des$MTRY <- round(des$MTRY);
if (!is.null(des$NTREE)) des$NTREE <- round(des$NTREE);
if (!is.null(des$NODESIZE)) des$NODESIZE <- round(des$NODESIZE);
if (!is.null(des$SAMPSIZE1)) des$SAMPSIZE1 <- round(des$SAMPSIZE1);
if (!is.null(des$SAMPSIZE2)) des$SAMPSIZE2 <- round(des$SAMPSIZE2);
if (!is.null(des$SAMPSIZE3)) des$SAMPSIZE3 <- round(des$SAMPSIZE3);
if (!is.null(des$SAMPSIZE4)) des$SAMPSIZE4 <- round(des$SAMPSIZE4);
if (!is.null(des$SAMPSIZE5)) des$SAMPSIZE5 <- round(des$SAMPSIZE5);
if (!is.null(des$PRE_NPC)) des$PRE_NPC = round(des$PRE_NPC);
#
# --- TODO: infer the INT columns automatic from spotConfig's ROI
# --- (when the interface to tdmStart* is clear everywhere)
if (printSummary) print(summary(des));
des;
}
######################################################################################
# tdmMapOpts:
# helper fct for unbiasedRun_*.r, map certain parameters of opts for the unbiased
# runs, depending on parameter umode, which controls the mode of the unbiased run
# INPUT
# umode # one of { "RSUB" | "CV" | "TST" | "SP_T" }
# # ="RSUB": activate random subsampling with tdm$TST.testFrac test data
# # ="CV": activate cross validation with tdm$nfold [10] folds
# # ="TST": activate test on unseen test data (user-def'd)
# # ="SP_T": activate test on unseen test data (randomly selected by tdmReadAndSplit)
# opts # current state of parameter settings
# tdm # list, here we use the elements
# nfold # [10] value for opts$TST.NFOLD during unbiased runs with umode="CV"
# tstCol # ["TST"] value for opts$TST.COL during unbiased runs with umode="TST"
# TST.testFrac # [0.2] value for opts$TST.valiFrac during unbiased runs with umode="RSUB"
# U.trnFrac # [NULL] value for opts$TST.trnFrac during unbiased runs with umode="SP_T" or umode="TST"
# nrun # [5] value for opts$NRUN during unbiased runs
# test2.string # ["default.cutoff"] value for opts$test2.string during unbiased runs
# (the defaults in '[...]' are filled via tdmDefaultsFill, if tdm does not define them.)
# OUTPUT
# opts # enhanced state of parameter settings
######################################################################################
tdmMapOpts <- function(umode,opts,tdm)
{
tdm <- tdmDefaultsFill(tdm); # fill in default values if they are not yet set
# Now (July'2015) only branch setOpts.RSUB should be used.
# The others are deprecated (only for downward compatibility)
setOpts.RSUB <- function(opts) {
opts$TST.kind <- "rand" # select test set by random subsampling, see tdmModCreateCVindex in tdmModelingUtils.r
opts$TST.valiFrac=tdm$TST.testFrac # set this fraction of data aside for testing
opts;
}
setOpts.TST <- function(opts) {
opts$TST.kind <- "col" # select test set from column TST.COL, see tdmModCreateCVindex in tdmModelingUtils.r
if (!is.null(tdm$U.trnFrac))
opts$TST.trnFrac=tdm$U.trnFrac # Use only this fraction of the TrainVa-data for training (even when opts$TST.kind="col").
# If tdm$U.trnFrac is NULL, then tdmModCreateCVindex uses all TrainVa-data for training.
opts$TST.COL=tdm$tstCol
opts;
}
setOpts.CV <- function(opts) {
opts$TST.kind <- "cv" # select test data by CV, see tdmModCreateCVindex in tdmModelingUtils.r
opts$TST.NFOLD = tdm$nfold # number of CV-folds
opts;
}
setOpts.SP_T <- function(opts) {
# [we leave opts$TST.kind at its current value]
if (!is.null(tdm$U.trnFrac))
opts$TST.trnFrac=tdm$U.trnFrac # Set this fraction of the TrainVa-data aside for training (only for opts$TST.kind="rand").
# If tdm$U.trnFrac is NULL, then tdmModCreateCVindex sets opts$TST.trnFrac = 1-opts$TST.valiFrac.
if (!is.null(tdm$TST.valiFrac))
opts$TST.valiFrac=tdm$TST.valiFrac
opts$TST.NFOLD = tdm$nfold # number of CV-folds (only for opts$TST.kind=="cv")
opts;
}
opts <- switch(umode
, "RSUB" = setOpts.RSUB(opts)
, "CV" = setOpts.CV(opts)
, "TST" = setOpts.TST(opts)
, "SP_T" = setOpts.SP_T(opts)
, "INVALID"
);
if (opts[1]=="INVALID")
stop(sprintf("*** Invalid umode=%s ***\n",umode));
opts$NRUN = tdm$nrun # how many runs with different train & test samples - or -
# how many CV-runs, if TST.kind=="cv"
opts$test2.string=tdm$test2.string # test2, another copy of the test set, receives a special treatment and the
# special treatment is either opts$test2.string = "no postproc" or = "default cutoff"
# opts$READ.TXT = T # =T: read data from .csv and save as .Rdata, =F: read from .Rdata
opts$VERBOSE <- opts$SRF.verbose <- tdm$optsVerbosity;
opts;
}
#--- DEPRECATED, just for compatibility with older versions unbiasedBestRun_appStorm.R, unbiasedBestRun_DMC2007.R -----------------
#
tdmMapOpts.OLD <- function(umode,opts,test2.string="no postproc",nfold=10) {
tdm = list();
tdm$test2.string=test2.string;
tdm$nfold=nfold;
opts <- tdmMapOpts(umode,opts,tdm);
}
######################################################################################
# ---- tdmMapCutoff is deprecated, all is done in tdmModAdjustCutoff now. ---
# tdmMapCutoff:
# helper fct for tdmStart*.r in the classification task case
# enforce parameter constraint sum(CUTOFFi)=1 if CUTOFF2,3,4 appears in .des-file.
# The following code works up to N.c=5 [N.c = number of classes]
#
# Bug fix 01/2011: the constraint fix is refined in such a way that all constrained will
# be enforced simultaneously:
# 1) sum(cutoffs)==1
# 2) any(cutoffs) <= high ROI constraint (from spotConfig)
# 3) any(cutoffs) >= low ROI constraint (from spotConfig)
# We assume here for simplicity that the high ROI and low ROI are the same for all cutoffs.
######################################################################################
tdmMapCutoff <- function(des,k,spotConfig) {
# note that nothing needs to be done here if only des$CUTOFF1 appears (two-class problem):
# then the dependent parameter opts$CLS.cutoff[2] = 1-opts$CLS.cutoff[1] is set in tdmModAdjustCutoff
#
if (!is.null(des$CUTOFF2)) { # enforce parameter constraint if CUTOFF1,2,3,4 appears in .des-file:
hig=spotConfig$alg.roi["CUTOFF1","upper"];
low=spotConfig$alg.roi["CUTOFF1","lower"];
OLD.VER=F; # if OLD.VER==T: restore the previous version which had the low/high ROI constraint
# violation bug (but gave better results on appAcid)
constr=ifelse(OLD.VER,0,1);
# If sum(cutoff[1:(N.c-1)])>=1, we clip cutoff[1:(N.c-1] in such a way that the sum is below 1.
# Why eps? - Because RF does not allow cutoff[N.c]=0, it has to be positive (see main_DMC2007.r)
eps <- runif(1, min=0.0001, max=0.03);
csum <- des$CUTOFF1[k]+des$CUTOFF2[k];
if (!is.null(des$CUTOFF3)) csum <- csum + des$CUTOFF3[k];
if (!is.null(des$CUTOFF4)) csum <- csum + des$CUTOFF4[k];
if (csum+eps>constr) {
# If constr==1: CUTOFF1..4 sum up to something larger than 1-eps.
# There is no room left for CUTOFF[N.c], so we scale CUTOFF1..4 down.
# If constr==0 (i.e. OLD.VER==T): this if-branch is always true, we scale always.
# This means that CUTOFF[N.c]=eps and that CUTOFF1..4 will be scaled up, if csum+eps<1;
# in this case the high ROI constraint can be violated.
des$CUTOFF1[k] <- des$CUTOFF1[k]/(csum+eps);
des$CUTOFF2[k] <- des$CUTOFF2[k]/(csum+eps);
if (!is.null(des$CUTOFF3)) des$CUTOFF3[k] <- des$CUTOFF3[k]/(csum+eps);
if (!is.null(des$CUTOFF4)) des$CUTOFF4[k] <- des$CUTOFF4[k]/(csum+eps);
}
# CUTOFF[N.c] will be set in main_TASK.r
# Attention: it can (and will) happen, that 1-csum>hig, if hig is the high ROI
# constraint for CUTOFF. In this case CUTOFF[N.c]=1-csum will be larger than hig.
# We currently tolerate this; CUTOFF[N.c]=1-csum will still obey the constraint "<1".
cutoffs=c(des$CUTOFF1[k],des$CUTOFF2[k],des$CUTOFF3[k],des$CUTOFF4[k]);
# cutoffs will be shorter than 4 elements if some of the des$CUTOFF*[k] are NULL
if (!OLD.VER) {
while (any(cutoffs<low)) {
# repair violations of the low ROI constraint, if any:
# add the missing amount delta to the lowest cutoff, subtract it from the highest
cutoffs = c(cutoffs,1-sum(cutoffs));
wmin = which(cutoffs==min(cutoffs));
wmax = which(cutoffs==max(cutoffs));
delta = low-cutoffs[wmin];
cutoffs[wmin] = cutoffs[wmin]+delta;
cutoffs[wmax] = cutoffs[wmax]-delta;
cutoffs=cutoffs[1:(length(cutoffs)-1)];
}
if (!is.null(des$CUTOFF1[k])) des$CUTOFF1[k]=cutoffs[1];
if (!is.null(des$CUTOFF2[k])) des$CUTOFF2[k]=cutoffs[2];
if (!is.null(des$CUTOFF3[k])) des$CUTOFF3[k]=cutoffs[3];
if (!is.null(des$CUTOFF4[k])) des$CUTOFF4[k]=cutoffs[4];
}
if (any(cutoffs>hig)) {
msg=sprintf("Some cutoffs violate high ROI constraint %f (for k=%d)",hig,k);
warning(msg);
print(cutoffs);
}
if (any(cutoffs<low)) {
msg=sprintf("Some cutoffs violate low ROI constraint %f (for k=%d)",low,k);
warning(msg);
print(cutoffs);
}
} # if (!is.null(des$CUTOFF2))
#print(c(cutoffs,1-sum(cutoffs)));
#browser()
des;
}
tdmMapDesFromX <- function(x,envT) {
# put parameter vector x in a one-row data frame and attach param names from .roi file:
des <- as.data.frame(x);
names(des) <- rownames(envT$spotConfig$alg.roi);
# round INT columns of data frame des and print its summary (see tdmMapDesign.r):
des <- tdmMapDesInt(des,printSummary=F,envT$spotConfig);
if (!is.null(envT$res)) {
des$CONFIG = max(envT$res$CONFIG)+1;
} else des$CONFIG=1;
if (!is.null(envT$bst)) {
des$STEP = nrow(envT$bst);
} else des$STEP=0;
des$REPEATS=envT$spotConfig$replicates;
des$SEED=envT$spotConfig$alg.seed + seq(1,nrow(x));
#if (!is.null(envT$res)) des$SEED = des$SEED + nrow(envT$res);
if (is.null(des$repeatsLastConfig)) des$repeatsLastConfig=1; # dummy
des;
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.