R/RuleGenerationPreClassifier.R

Defines functions RuleGenerationPreClassifier

Documented in RuleGenerationPreClassifier

RuleGenerationPreClassifier=function(RuleSet,NamesInRuleSet,LIST2Fun,RuleDirectory=getwd(),FunctionName="FlowTypeclassify",digits=2,MatlabStyle=FALSE){
  #V = RuleGenerationPreClassifier(DecisionTree,Names,RuleDirectory,FunctionName) 
  #RuleSet=V$RuleSet;Prefix=V$Prefix;Names2Var=V$Names2Var;NamesInRuleSet=V$NamesInRuleSet;FunctionAsfString=V$FunctionAsfString
  # RuleGenerationClassifier(Data,Header) 
  # is written containing seperate descriptions of the populations in form of:
  # PopolationIndex = which(Conditions)
  #
  # INPUT
  # RuleSet        Array of Rules derived from Tree.
  # NamesInRuleSet      character array containing all the unique Names that are used in the RuleSete
  # LIST2Fun
  #
  # OPTIONAL
  # RuleDirectory        where to write the function default: 
  #                      RuleDirectory='D:\Subversion\PUB\Mdbt\Classifiers'
  #
  # FunctionName         the name of the R function for AlPODS, default:
  #                      'ALPODSclassify' 
  # OUTPUT 
  # a R function   [RuleCls,PopulationNumber] = ALPODSClassify(Data,Header) 
  #                      is written into ALPODSclassify.m
  #                      RuleCls        returns the classification in terms of Cls
  #                      PopulationNumber  returns the number of the Rule used for the classification
  # 
  # the function's text  can be constructed using the following text variables
  # RuleSet              Character Array: containing the Rules one in each line
  # Prefix               the header of the function, e.g.: RuleCls = zeros(n,1)  PopulationNumber =zeros(n,1) 
  # Names2Var            How to name the variablee e.g.: CD13=findAttrCol('CD13',Header,Data)   
  # NamesInRuleSet       character array containing all the unique Names that are used in the RuleSet
  # FunctionAsfString   Strings for pre classifier function used in the classifier function geenerating the rulecls
  #author MT 2020
  
  ## MT nun die Regeln als andwendbare Funktion ausschreiben:
  path=getwd()
  tryCatch({
    setwd(RuleDirectory)
  },error=function(e){
      warning(paste("could not change directory because ",e))
  })
  FirstLine=paste0(FunctionName,"=function(Data,Header){")
  
  ReturnLine="return(list(PopulationNumber=PopulationNumber,IndList=Indlist,UniquePopNo=UniquePopNo,ConditionNoPerRule=ConditionNoPerRule))"
  LastLine="}"
  
  FileName=paste0(FunctionName,".R")
  cat(FirstLine, file=FileName, append=FALSE)
  cat("\n", file=FileName, append=TRUE)
  
  Doku = c(paste('# V = ',FunctionName,'(Data,Header)\n'),
           '# PopulationNumber=V$PopulationNumber;UniquePopNo=V$UniquePopNo;ConditionNoPerRule=V$ConditionNoPerRule;Indlist=V$Indlist;\n',
           '# rule based population generation without RuleCls\n',
           '#\n',
           '# INPUT\n',
           '# Data[1:n,1:d]        data array of d cases with n variables\n',
           '#\n',
           '# Optional\n',
           '# Header[1:d]          Header of data if given, otherwise colnames(Data) per default\n',
           '#\n',
           '# OUTPUT\n',
           '# List V of\n',
           '# PopulationNumber[1:n]     vector of classes, d integer numbers, number i indicates subpopulation_i of m subpupulations equal to the m leafs of the tree\n',
           '#                           BEWARE: Depending on input rules are not disjunct, and then always last Indlist[[i]] is applied to generate the PopulationNumber Cls!\n',
           '# Indlist[1:m]              list of inds for each population such that PopulationNumber[Indlist[[i]]]=subpopulation_i \n',
           '# UniquePopNo[1:m]          Vector of the unqique subpopulations in data\n',
           '# ConditionNoPerRule[1:m]     Number of conditions per rule, each rule defines one subpopulation i\n',
           '# \n',
           '# V 1.1\n',
           '# generated by XAI::RuleGenerationPreClassifier.R\n')
  
  Prefix="PopulationNumber = rep(0,nrow(Data));RuleCls = rep(0,nrow(Data));"
  if(isTRUE(MatlabStyle)){
    OptionalHeader1="if(missing(Header))"
    OptionalHeader2="   Header=colnames(Data)"
  }else{
    OptionalHeader1="if(!is.null(Header))"
    OptionalHeader2="   colnames(Data)=Header"
  }

  
  cat(Doku, file=FileName, append=TRUE)
  cat("\n", file=FileName, append=TRUE)
  cat(OptionalHeader1, file=FileName, append=TRUE)
  cat("\n", file=FileName, append=TRUE)
  
  cat(OptionalHeader2, file=FileName, append=TRUE)
  cat(";\n", file=FileName, append=TRUE)
  cat("\n", file=FileName, append=TRUE)
  cat(Prefix, file=FileName, append=TRUE)
  cat("\n", file=FileName, append=TRUE)
  cat("\n", file=FileName, append=TRUE)
  #Select Columns
  
  if(isTRUE(MatlabStyle)){
    Lines = vector(mode = 'character',length = length(NamesInRuleSet))
    for(i in 1:length(NamesInRuleSet)){
      Lines[i]=paste0(NamesInRuleSet[i]," = findAttrCol('",NamesInRuleSet[i],"',Header,Data)")
      cat(Lines[i], file=FileName, append=TRUE)
      cat(";\n", file=FileName, append=TRUE)
    }
  }else{
    Lines = vector(mode = 'character',length = length(NamesInRuleSet))
    for(i in 1:length(NamesInRuleSet)){
      Lines[i]=paste0(NamesInRuleSet[i]," = Data[,'",NamesInRuleSet[i],"']")
      cat(Lines[i], file=FileName, append=TRUE)
      cat(";\n", file=FileName, append=TRUE)
    }
  }
  #Rules with which pre-prepared
  cat("\n", file=FileName, append=TRUE)
  cat("#Rule Set, each Line one Rule", file=FileName, append=TRUE)
  cat(";\n", file=FileName, append=TRUE)
  for(i in 1:length(RuleSet)){
    cat(RuleSet[i], file=FileName, append=TRUE)
    cat(";\n", file=FileName, append=TRUE)
  }
  
  #Prepare Number of Rules
  PopNo=gsub("Ind","",LIST2Fun)
  RuleNo=paste0("Rule",PopNo,collapse = "','")
  
  cat("\n", file=FileName, append=TRUE)
  #LineUniquePopNo=paste0("UniquePopNo = c(",paste0(PopNo,collapse = ","),");\n")
  LineUniquePopNo=paste0("AnzahlPopulations = ",max(as.numeric(PopNo)),";UniquePopNo = 1:AnzahlPopulations;\n")
  cat(LineUniquePopNo, file=FileName, append=TRUE)
  
  NamesUniquePopNo=paste0("names(UniquePopNo) = c('",RuleNo,"')")
  if(isFALSE(MatlabStyle)){
    cat(NamesUniquePopNo, file=FileName, append=TRUE)
    cat(";\n", file=FileName, append=TRUE)
  }
  #Conditions
  NoConditionsVec=sapply(RuleSet, function(x){
    counts=stringr::str_count(x,"<")+stringr::str_count(x,">")#+stringr::str_count(x,">=")+stringr::str_count(x,"<=")
  })
  #NoConditionsVec
  NoConditionsPerRule=sapply(NoConditionsVec,sum)
  LineCondition=paste0("ConditionNoPerRule = c(",paste0(NoConditionsPerRule,collapse = ","),");\n")
  
  NamesConditions=paste0("names(ConditionNoPerRule) = c('",RuleNo,"')")
  
  cat("\n", file=FileName, append=TRUE)
  cat(LineCondition, file=FileName, append=TRUE)
  
  if(isFALSE(MatlabStyle)){
    cat(NamesConditions, file=FileName, append=TRUE)
    cat(";\n", file=FileName, append=TRUE)
  }
  
  cat("\n", file=FileName, append=TRUE)
  PopLines=c()
  for(i in 1:length(RuleSet)){
    PopLines[i]=paste0("PopulationNumber[",LIST2Fun[i],"]"," = ",PopNo[i])
    cat(PopLines[i], file=FileName, append=TRUE)
    cat(";\n", file=FileName, append=TRUE)
  }
  cat("\n", file=FileName, append=TRUE)
  LIST2FunLine=paste0("Indlist = list(",paste(LIST2Fun,collapse = ","),")")
  IndListLines=LIST2FunLine
  cat(IndListLines[1], file=FileName, append=TRUE)
  cat(";\n", file=FileName, append=TRUE)
 
 
  LinesLast=paste0("names(Indlist) = c('",RuleNo,"')")

  
  IndListLines=c(IndListLines,LinesLast)
  
  if(isFALSE(MatlabStyle)){
    cat(LinesLast, file=FileName, append=TRUE)
    cat(";\n", file=FileName, append=TRUE)
  }
  cat(ReturnLine, file=FileName, append=TRUE)
  cat("\n", file=FileName, append=TRUE)
  cat(LastLine, file=FileName, append=TRUE)
  
  setwd(path)
  FunctionAsfString=list(FirstLine=FirstLine,Prefix=Prefix,DataSelect=Lines,RuleSet=RuleSet,
                         LineUniquePopNo=LineUniquePopNo,NamesUniquePopNo=NamesUniquePopNo,
                         LineCondition=LineCondition,NoConditionsPerRule=NoConditionsPerRule,
                         PopLines=PopLines,IndListLines=IndListLines,ReturnLine=ReturnLine,
                         LastLine=LastLine)
  return(list(RuleSet=RuleSet,Prefix=Prefix,Names2Var=Lines,NamesInRuleSet=NamesInRuleSet,FunctionAsfString=FunctionAsfString))
  
}
Mthrun/dbt.FlowCytometry documentation built on June 5, 2023, 10:30 a.m.