inst/rkward/rkwarddev_koRpus_plugin_script.R

# the plugin code was generated by this script
# you should not change the plugin code directly, but this script
# note: this script only creates objects in your workspace,
# *EXCEPT* for the last call, see below.

require(rkwarddev)
rkwarddev.required("0.07-4")

local({
# set the output directory to overwrite the actual plugin
output.dir <- tempdir()
overwrite <- TRUE
# if you set guess.getters to TRUE, the resulting code will need RKWard >= 0.6.0
guess.getter <- TRUE
rk.set.indent(by="  ")
rk.set.empty.e(TRUE)
update.translations <- TRUE

menu.hierarchy <- list("analysis", "Text Analysis")

about.info <- rk.XML.about(
  name="koRpus",
  author=c(
    person(given="Meik", family="Michalke",
      email="meik.michalke@hhu.de", role=c("aut","cre"))),
  about=list(desc="RKWard GUI for the koRpus package.")
  )
dependencies.info <- rk.XML.dependencies(
  dependencies=list(rkward.min=ifelse(isTRUE(guess.getter), "0.6.3", "0.5.6"))
)


operationMode <- rk.XML.radio(label="Select operation mode", options=list(
    "Tokenize with tokenize()"=c(val="file", chk=TRUE),
    "Tokenize and tag with TreeTagger"=c(val="fileTreeTagger")
  ),
  id.name="operationMode"
)

TTRootDir <- rk.XML.browser(
  label="TreeTagger root folder:",
  type="dir",
  id.name="TTRootDir"
)
TTRootText <- rk.XML.text(
  "The TreeTagger folder is the one containing the bin, cmd and lib folders",
  id.name="TTRootText")
language <- rk.XML.dropdown(label="Text language:", options=list(
    "Dutch"=c(val="nl"),
    "English"=c(val="en", chk=TRUE),
    "French"=c(val="fr"),
    "German"=c(val="de"),
    "Italian"=c(val="it"),
    "Portuguese"=c(val="pt"),
    "Russian"=c(val="ru"),
    "Spanish"=c(val="es")
  ),
  id.name="language"
)
textFile <- rk.XML.browser(
  label="Text to analyze:",
  filter="*.txt",
  id.name="textFile"
)
kRp.POS.frm.TT <- rk.XML.frame(
  TTRootDir,
  TTRootText,
  language,
  textFile
)

detectHeadlines <- rk.XML.cbox(
  label="Detect headlines (treated as sentences)",
  value="hline=TRUE",
  id.name="detectHeadlines"
)
detectParagraphs <- rk.XML.cbox(
  label="Detect paragraphs",
  value="parag=TRUE",
  id.name="detectParagraphs"
)
tknzOptions <- rk.XML.frame(
  detectHeadlines,
  detectParagraphs,
  label="tokenize() options",
  id.name="tknzOptions"
)

showTaggedFrame <- rk.XML.frame(
  showTagged <- rk.XML.cbox(
    label="List tokenized results in output (slow for long texts!)",
    value="tagged",
    id.name="showTagged"),
  id.name="showTaggedFrame"
)

saveTaggedText <- rk.XML.saveobj(label="Keep tagged text object", initial="tagged.text.obj", chk=TRUE, id.name="saveTaggedText")

kRp.dialog.POS <- rk.XML.dialog(
  rk.XML.row(
    rk.XML.col(
      operationMode,
      kRp.POS.frm.TT,
      tknzOptions,
      rk.XML.stretch(),
      showTaggedFrame,
      saveTaggedText
    )
  ),
  label="POS Tagging"
)

## logic section
kRp.POS.lgc.sect <- rk.XML.logic(
  kRp.POS.lgc.tagModeTokenize <- rk.XML.convert(sources=list(string=operationMode), mode=c(equals="file")),
  kRp.POS.lgc.tagModeTreeTagger <- rk.XML.convert(sources=list(string=operationMode), mode=c(equals="fileTreeTagger")),
  rk.XML.connect(governor=kRp.POS.lgc.tagModeTreeTagger, client=TTRootText, set="visible"),
  rk.XML.connect(governor=kRp.POS.lgc.tagModeTreeTagger, client=TTRootDir, set="visible"),
  rk.XML.connect(governor=kRp.POS.lgc.tagModeTreeTagger, client=TTRootDir, set="required"),
  rk.XML.connect(governor=kRp.POS.lgc.tagModeTokenize, client=tknzOptions, set="visible")
)

## JavaScript
kRp.POS.js.prep <- rk.paste.JS(
  rk.JS.vars(language),
  js(
    if(language == "nl"){
      echo("require(koRpus.lang.nl)\n")
    } else if(language == "pt"){
      echo("require(koRpus.lang.pt)\n")
    } else {}
  )
)
kRp.POS.js.lang <- rk.JS.vars("TTLang")
kRp.POS.js.calc <- rk.paste.JS(
  # these are probably fetched as boolean, ensure we get the character values
  kRp.POS.js.arr.tkheadl <- rk.JS.vars(detectHeadlines, var.prefix="value"),
  kRp.POS.js.arr.tkparag <- rk.JS.vars(detectParagraphs, var.prefix="value"),
  kRp.POS.js.array <- rk.JS.array("detect", variables=list(id(kRp.POS.js.arr.tkheadl), id(kRp.POS.js.arr.tkparag)), opt.sep=",\\n\\t"),
  js(
    id("var ", kRp.POS.js.lang, " = ", language, ";")
  ),
  js(
    if(operationMode == "file"){
      echo("tagged.text.obj <- tokenize(\n\t\"", textFile, "\",\n\tlang=\"", kRp.POS.js.lang, "\"", kRp.POS.js.array, "\n)\n\n")
    } else {
      echo("tagged.text.obj <- treetag(\n\t\"", textFile, "\",\n\ttreetagger=\"manual\",\n\tlang=\"", kRp.POS.js.lang, "\",\n\tTT.options=list(path=\"", TTRootDir, "\",\n\tpreset=\"", language, "\")\n)\n\n")
    }
  )
)

kRp.POS.js.print <- rk.paste.JS(
  rk.JS.vars(textFile, language, showTagged),
  rk.JS.header("Tokenizing & POS tagging results",
    add=c("Text", textFile),
    add=c("Language", language)
  ),
  rk.JS.header("Word class distribution", level=3),
#   echo("\trk.print.literal(paste0(\"<strong>Sentences: </strong>\", describe(tagged.text.obj)$sentences))\n"),
#   echo("\trk.print.literal(paste0(\"<strong>Words: </strong>\", describe(tagged.text.obj)$words,\n\t\t",
#     "\" (\", round(describe(tagged.text.obj)$avg.sentc.length, digits=2), \" per sentence)\"))\n"),
#   echo("\trk.print.literal(paste0(\"<strong>Letters: </strong>\", describe(tagged.text.obj)$letters[[\"all\"]],\n\t\t",
#     "\" (\", round(describe(tagged.text.obj)$avg.word.length, digits=2), \" per word)\"))\n"),
  echo("rk.print(summary(tagged.text.obj))\n"),
  js(
    if(showTagged){
      rk.JS.header("Tagged text", level=3)
      echo("rk.print(taggedText(tagged.text.obj))\n")
    }
  ),
  echo("\n")
)

### hyphenation

varsHyph <- rk.XML.varselector(id.name="varsHyph")
varHyphenTagged <- rk.XML.varslot(
  label="Tokenized text object (valid class: kRp.tagged)",
  source=varsHyph,
  classes=c("kRp.tagged"),
  required=TRUE,
  id.name="varHyphenTagged"
)

showHyphenationFrame <- rk.XML.frame(
  showHyphenation <- rk.XML.cbox(
    label="List hyphenation results in output",
    value="hyph",
    id.name="showHyphenation"
  ),
  id.name="showHyphenationFrame"
)
saveHyphen <- rk.XML.saveobj(label="Keep hyphenated text object", initial="hyphenated.text.obj", chk=TRUE, id.name="saveHyphen")

kRp.tab.hyph <- list(
  rk.XML.row(
    rk.XML.col(varsHyph),
    rk.XML.col(
       varHyphenTagged,
      rk.XML.stretch(),
      showHyphenationFrame,
      saveHyphen
    ),
  id.name="rowHyph")
)

kRp.dialog.hyph <- rk.XML.dialog(kRp.tab.hyph, label="Hyphenation")

kRp.hyph.js.calc <- rk.paste.JS(
  echo("hyphenated.text.obj <- hyphen(\n\t", varHyphenTagged, ",\n\tquiet=TRUE\n)\n\n")
)

kRp.hyph.js.print <- rk.paste.JS(
  rk.JS.vars(showHyphenation),
  js(
    if(showHyphenation){
      echo("rk.print(hyphenated.text.obj@hyphen)\n\n")
    }
  )
)

kRp.hyph.component <- rk.plugin.component("Hyphenation",
  xml=list(
    dialog=kRp.dialog.hyph),
   js=list(
     calculate=kRp.hyph.js.calc,
     printout=kRp.hyph.js.print
  ),
  guess.getter=guess.getter,
  hierarchy=menu.hierarchy,
  create=c("xml", "js"),
  gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)


## readability


readabilityIndicesFrame <- rk.XML.frame(
  rk.XML.row(
    rk.XML.col(
      ARI <- rk.XML.cbox(label="Automated Readability Index (ARI)", value="ARI", chk=TRUE, id.name="ARI"),
      ARINRI <- rk.XML.cbox(label="ARI (NRI)", value="ARI.NRI", chk=TRUE, id.name="ARINRI"),
      ColemanLiau <- rk.XML.cbox(label="Coleman-Liau", value="Coleman.Liau", chk=TRUE, id.name="ColemanLiau"),
      DanielsonBryan <- rk.XML.cbox(label="Danielson-Bryan (D 1+2)", value="Danielson.Bryan", chk=TRUE, id.name="DanielsonBryan")
    ),
    rk.XML.col(
      DickesSteiwer <- rk.XML.cbox(label="Dickes-Steiwer Handformel", value="Dickes.Steiwer", chk=TRUE, id.name="DickesSteiwer"),
      Fucks <- rk.XML.cbox(label="Fucks' Stilcharakteristik", value="Fucks", chk=TRUE, id.name="Fucks"),
      LIX <- rk.XML.cbox(label="Läsbarhetsindex (LIX)", value="LIX", chk=TRUE, id.name="LIX"),
      RIX <- rk.XML.cbox(label="Readability Index (RIX)", value="RIX", chk=TRUE, id.name="RIX")
    )
  ),
  id.name="readabilityIndicesFrame"
)




readabilityNeedSylls <- rk.XML.col(
  rk.XML.text("Selecting any of these indices will automatically activate syllable count."),
  rk.XML.row(
    rk.XML.col(
      Coleman <- rk.XML.cbox(label="Coleman (C 1-4)", value="Coleman", id.name="Coleman"),
      ELF <- rk.XML.cbox(label="Easy Listening Formula", value="ELF", id.name="ELF"),
      FarrJenkinsPaterson <- rk.XML.cbox(label="Farr-Jenkins-Paterson", value="Farr.Jenkins.Paterson", id.name="FarrJenkinsPaterson"),
      FarrJenkinsPatersonPSK <- rk.XML.cbox(label="Farr-Jenkins-Paterson (Powers-Sumner-Kearl)", value="Farr.Jenkins.Paterson.PSK", id.name="FarrJenkinsPatersonPSK"),
      Flesch <- rk.XML.cbox(label="Flesch Reading Ease", value="Flesch", id.name="Flesch"),
      FleschDE <- rk.XML.cbox(label="Flesch (DE, Amstad)", value="Flesch.de", id.name="FleschDE"),
      FleschES <- rk.XML.cbox(label="Flesch (ES, Fernandez-Huerta)", value="Flesch.es", id.name="FleschES"),
      FleschSzigriszt <- rk.XML.cbox(label="Flesch (ES, Szigriszt)", value="Flesch.Szigriszt", id.name="FleschSzigriszt"),
      FleschFR <- rk.XML.cbox(label="Flesch (FR, Kandel-Moles)", value="Flesch.fr", id.name="FleschFR"),
      FleschNL <- rk.XML.cbox(label="Flesch (NL, Douma)", value="Flesch.nl", id.name="FleschNL"),
      FleschNLB <- rk.XML.cbox(label="Flesch (NL, Brouwer)", value="Flesch.nl-b", id.name="FleschNLB"),
      FleschPSK <- rk.XML.cbox(label="Flesch Reading Ease (Powers-Sumner-Kearl)", value="Flesch.PSK", id.name="FleschPSK"),
      FleschKincaid <- rk.XML.cbox(label="Flesch-Kincaid Grade Level", value="Flesch.Kincaid", id.name="FleschKincaid"),
      FOG <- rk.XML.cbox(label="FOG (Gunning)", value="FOG", id.name="FOG"),
      FOGPSK <- rk.XML.cbox(label="FOG (Powers-Sumner-Kearl)", value="FOG.PSK", id.name="FOGPSK"),
      rk.XML.stretch()
    ),
    rk.XML.col(
      FOGNRI <- rk.XML.cbox(label="FOG (NRI)", value="FOG.NRI", id.name="FOGNRI"),
      FORCAST <- rk.XML.cbox(label="FORCAST", value="FORCAST", id.name="FORCAST"),
      FORCASTRGL <- rk.XML.cbox(label="FORCAST (RGL)", value="FORCAST.RGL", id.name="FORCASTRGL"),
      TRI <- rk.XML.cbox(label="Kuntzsch's Text-Redundanz-Index", value="TRI", id.name="TRI"),
      LinsearWrite <- rk.XML.cbox(label="Linsear Write", value="Linsear.Write", id.name="LinsearWrite"),
      SMOG <- rk.XML.cbox(label="SMOG", value="SMOG", id.name="SMOG"),
      SMOGC <- rk.XML.cbox(label="SMOG (formula C)", value="SMOG.C", id.name="SMOGC"),
      SMOGsimple <- rk.XML.cbox(label="SMOG (simple)", value="SMOG.simple", id.name="SMOGsimple"),
      Qu <- rk.XML.cbox(label="SMOG (DE, »Qu«)", value="SMOG.de", id.name="Qu"),
      Strain <- rk.XML.cbox(label="Strain Index", value="Strain", id.name="Strain"),
      Tuldava <- rk.XML.cbox(label="Tuldava", value="Tuldava", id.name="Tuldava"),
      WheelerSmith <- rk.XML.cbox(label="Wheeler-Smith", value="Wheeler.Smith", id.name="WheelerSmith"),
      WheelerSmithDE <- rk.XML.cbox(label="Wheeler-Smith (DE)", value="Wheeler.Smith.de", id.name="WheelerSmithDE"),
      nWS <- rk.XML.cbox(label="Wiener Sachtextformeln (nWS 1-4)", value="nWS", id.name="nWS"),
      rk.XML.stretch()
    )
  ),
  label="Formulae that need syllable count",
  id.name="readabilityNeedSylls"
)

readabilityNeedWL <- rk.XML.col(
  rk.XML.text("If you select any of these indices you will also need to provide word lists as indicated below."),
  rk.XML.row(
    rk.XML.col(
      Bormuth <- rk.XML.cbox(label="Bormuth Mean Cloze + Grade", value="Bormuth", id.name="Bormuth"),
      DaleChall <- rk.XML.cbox(label="Dale-Chall (1995)", value="Dale.Chall", id.name="DaleChall"),
      DaleChallPSK <- rk.XML.cbox(label="Dale-Chall (Powers-Sumner-Kearl)", value="Dale.Chall.PSK", id.name="DaleChallPSK"),
      DaleChallOld <- rk.XML.cbox(label="Dale-Chall (1948)", value="Dale.Chall.old", id.name="DaleChallOld"),
      rk.XML.stretch()
    ),
    rk.XML.col(
      DRP <- rk.XML.cbox(label="Degrees of Reading Power", value="DRP", id.name="DRP"),
      HarrisJacobson <- rk.XML.cbox(label="Harris-Jacobson", value="Harris.Jacobson", id.name="HarrisJacobson"),
      Spache <- rk.XML.cbox(label="Spache", value="Spache", id.name="Spache"),
      SpacheOld <- rk.XML.cbox(label="Spache (old)", value="Spache.old", id.name="SpacheOld"),
      rk.XML.stretch()
    )
  ),
  rk.XML.row(
    rk.XML.col(
      readbWLldc <- rk.XML.browser(label="Long Dale-Chall word list (*.txt, also used for Bormuth/DRP):", filter="*.txt", required=FALSE, id.name="readbWLldc"),
      readbWLsdc <- rk.XML.browser(label="Short Dale-Chall word list (*.txt):", filter="*.txt", required=FALSE, id.name="readbWLsdc"),
      readbWLHaJa <- rk.XML.browser(label="Harris-Jacobson word list (grades 1 and 2, *.txt):", filter="*.txt", required=FALSE, id.name="readbWLHaJa")
    ),
    id.name="rowWLfile"
  ),
  id.name="readabilityNeedWL",
  label="Formulae that need word lists"
)

saveReadb <- rk.XML.saveobj(label="Keep results", initial="readability.obj", chk=TRUE, id.name="saveReadb")

varTaggedHyphenated <- rk.XML.varslot(
  label="Hyphenated text object (optional, valid class: kRp.hyphen)",
  source=varsHyph,
  classes=c("kRp.hyphen"),
  id.name="varTaggedHyphenated"
)


kRp.tab.rdb <- rk.XML.tabbook(label="Readability",
  tabs=list(
  "Data and Basic Indices"=rk.XML.row(
    rk.XML.col(varsHyph),
    rk.XML.col(
       varHyphenTagged,
       varTaggedHyphenated,
      rk.XML.stretch(),
    readabilityIndicesFrame,
    saveReadb)),
  "Using Syllables"=readabilityNeedSylls,
  "Using Word Lists"=readabilityNeedWL
  )
)

kRp.dialog.rdb <- rk.XML.dialog(kRp.tab.rdb, label="Readability")

kRp.logic.rdb <- rk.XML.logic(
  kRp.lgc.rdb.LongDCWL <- rk.XML.convert(sources=list(
      state=DaleChall,
      state=DaleChallPSK,
      state=DaleChallOld,
      state=Bormuth,
      state=DRP), mode=c(or="")),
  kRp.lgc.rdb.ShortDCWL <- rk.XML.convert(sources=c(
      state=Spache,
      state=SpacheOld), mode=c(or="")),
  rk.XML.connect(governor=kRp.lgc.rdb.LongDCWL, client=readbWLldc, set="enabled"),
  rk.XML.connect(governor=kRp.lgc.rdb.LongDCWL, client=readbWLldc, set="required"),
  rk.XML.connect(governor=kRp.lgc.rdb.ShortDCWL, client=readbWLsdc, set="enabled"),
  rk.XML.connect(governor=kRp.lgc.rdb.ShortDCWL, client=readbWLsdc, set="required"),
  rk.XML.connect(governor=HarrisJacobson, client=readbWLHaJa, set="enabled"),
  rk.XML.connect(governor=HarrisJacobson, client=readbWLHaJa, set="required")
)

kRp.rdb.js.calc <- rk.paste.JS(
  ARI.val <- rk.JS.vars(ARI, var.prefix="value"),
  ARINRI.val <- rk.JS.vars(ARINRI, var.prefix="value"),
  Bormuth.val <- rk.JS.vars(Bormuth, var.prefix="value"),
  Coleman.val <- rk.JS.vars(Coleman, var.prefix="value"),
  ColemanLiau.val <- rk.JS.vars(ColemanLiau, var.prefix="value"),
  DaleChall.val <- rk.JS.vars(DaleChall, var.prefix="value"),
  DaleChallPSK.val <- rk.JS.vars(DaleChallPSK, var.prefix="value"),
  DaleChallOld.val <- rk.JS.vars(DaleChallOld, var.prefix="value"),
  DanielsonBryan.val <- rk.JS.vars(DanielsonBryan, var.prefix="value"),
  DickesSteiwer.val <- rk.JS.vars(DickesSteiwer, var.prefix="value"),
  DRP.val <- rk.JS.vars(DRP, var.prefix="value"),
  ELF.val <- rk.JS.vars(ELF, var.prefix="value"),
  FarrJenkinsPaterson.val <- rk.JS.vars(FarrJenkinsPaterson, var.prefix="value"),
  FarrJenkinsPatersonPSK.val <- rk.JS.vars(FarrJenkinsPatersonPSK, var.prefix="value"),
  Flesch.val <- rk.JS.vars(Flesch, var.prefix="value"),
  FleschES.val <- rk.JS.vars(FleschES, var.prefix="value"),
  FleschSzigriszt.val <- rk.JS.vars(FleschSzigriszt, var.prefix="value"),
  FleschNL.val <- rk.JS.vars(FleschNL, var.prefix="value"),
  FleschNLB.val <- rk.JS.vars(FleschNLB, var.prefix="value"),
  FleschDE.val <- rk.JS.vars(FleschDE, var.prefix="value"),
  FleschFR.val <- rk.JS.vars(FleschFR, var.prefix="value"),
  FleschPSK.val <- rk.JS.vars(FleschPSK, var.prefix="value"),
  FleschKincaid.val <- rk.JS.vars(FleschKincaid, var.prefix="value"),
  FOG.val <- rk.JS.vars(FOG, var.prefix="value"),
  FOGPSK.val <- rk.JS.vars(FOGPSK, var.prefix="value"),
  FOGNRI.val <- rk.JS.vars(FOGNRI, var.prefix="value"),
  FORCAST.val <- rk.JS.vars(FORCAST, var.prefix="value"),
  FORCASTRGL.val <- rk.JS.vars(FORCASTRGL, var.prefix="value"),
  Fucks.val <- rk.JS.vars(Fucks, var.prefix="value"),
  HarrisJacobson.val <- rk.JS.vars(HarrisJacobson, var.prefix="value"),
  LinsearWrite.val <- rk.JS.vars(LinsearWrite, var.prefix="value"),
  LIX.val <- rk.JS.vars(LIX, var.prefix="value"),
  nWS.val <- rk.JS.vars(nWS, var.prefix="value"),
  Qu.val <- rk.JS.vars(Qu, var.prefix="value"),
  RIX.val <- rk.JS.vars(RIX, var.prefix="value"),
  SMOG.val <- rk.JS.vars(SMOG, var.prefix="value"),
  SMOGC.val <- rk.JS.vars(SMOGC, var.prefix="value"),
  SMOGsimple.val <- rk.JS.vars(SMOGsimple, var.prefix="value"),
  Strain.val <- rk.JS.vars(Strain, var.prefix="value"),
  Spache.val <- rk.JS.vars(Spache, var.prefix="value"),
  SpacheOld.val <- rk.JS.vars(SpacheOld, var.prefix="value"),
  TRI.val <- rk.JS.vars(TRI, var.prefix="value"),
  Tuldava.val <- rk.JS.vars(Tuldava, var.prefix="value"),
  WheelerSmith.val <- rk.JS.vars(WheelerSmith, var.prefix="value"),
  WheelerSmithDE.val <- rk.JS.vars(WheelerSmithDE, var.prefix="value"),
  kRp.rdb.array <- rk.JS.array("index", variables=list(
    id(ARI.val),
    id(ARINRI.val),
    id(Bormuth.val),
    id(Coleman.val),
    id(ColemanLiau.val),
    id(DaleChall.val),
    id(DaleChallPSK.val),
    id(DaleChallOld.val),
    id(DanielsonBryan.val),
    id(DickesSteiwer.val),
    id(DRP.val),
    id(ELF.val),
    id(FarrJenkinsPaterson.val),
    id(FarrJenkinsPatersonPSK.val),
    id(Flesch.val),
    id(FleschDE.val),
    id(FleschES.val),
    id(FleschSzigriszt.val),
    id(FleschFR.val),
    id(FleschNL.val),
    id(FleschNLB.val),
    id(FleschPSK.val),
    id(FleschKincaid.val),
    id(FOG.val),
    id(FOGPSK.val),
    id(FOGNRI.val),
    id(FORCAST.val),
    id(FORCASTRGL.val),
    id(Fucks.val),
    id(HarrisJacobson.val),
    id(LinsearWrite.val),
    id(LIX.val),
    id(nWS.val),
    id(Qu.val),
    id(RIX.val),
    id(SMOG.val),
    id(SMOGC.val),
    id(SMOGsimple.val),
    id(Spache.val),
    id(SpacheOld.val),
    id(Strain.val),
    id(TRI.val),
    id(Tuldava.val),
    id(WheelerSmith.val),
    id(WheelerSmithDE.val)
  ), quote=TRUE, opt.sep=",\\n\\t"),
  kRp.rdb.JS.wordLists <- rk.JS.options("rdbWordLists",
    # word.lists = list(Bormuth = NULL, Dale.Chall = NULL, Harris.Jacobson = NULL, Spache = NULL)
    ite(
      id("(", Bormuth, " || ", DRP, ") && ", readbWLldc),
      qp("\n\t\tBormuth=\"", readbWLldc, "\"")
    ),
    ite(
      id("(", DaleChall, " || ", DaleChallPSK, " || ", DaleChallOld, ") && ", readbWLldc),
      qp("\n\t\tDale.Chall=\"", readbWLldc, "\"")
    ),
    ite(
      id(HarrisJacobson, " && ", readbWLHaJa),
      qp("\n\t\tHarris.Jacobson=\"", readbWLHaJa, "\"")
    ),
    ite(
      id("(", Spache, " || ", SpacheOld, ") && ", readbWLsdc),
      qp("\n\t\tSpache=\"", readbWLsdc, "\"")
    ),
    option="word.lists",
    funct="list",
    opt.sep=",\\n\\t"),
  echo("readability.obj <- readability(\n\t", varHyphenTagged, kRp.rdb.array),
  js(
    if(varTaggedHyphenated != ""){
      echo(",\n\thyphen=", varTaggedHyphenated)
    } else {}
  ),
  echo(kRp.rdb.JS.wordLists),
  echo(",\n\tquiet=TRUE\n)\n\n")
)

kRp.rdb.js.print <- rk.paste.JS(
  echo("rk.results(summary(readability.obj))\n")
)

kRp.rdb.component <- rk.plugin.component("Readability",
  xml=list(
    logic=kRp.logic.rdb,
    dialog=kRp.dialog.rdb),
   js=list(
     calculate=kRp.rdb.js.calc,
     printout=kRp.rdb.js.print
  ),
  guess.getter=guess.getter,
  hierarchy=menu.hierarchy,
  create=c("xml", "js"),
  gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)


## lexical diversity

LDIndices <- rk.XML.frame(
  rk.XML.row(
    rk.XML.col(
      TTR <- rk.XML.cbox("Type Token Ratio (TTR)", value="TTR", chk=TRUE, id.name="TTR"),
      MSTTR <- rk.XML.cbox("Mean Segmental TTR (MSTTR)", value="MSTTR", chk=TRUE, id.name="MSTTR"),
      MATTR <- rk.XML.cbox("Moving Average TTR (MATTR)", value="MATTR", chk=TRUE, id.name="MATTR"),
      Cld <- rk.XML.cbox("Herdan's C", value="C", chk=TRUE, id.name="Cld"),
      Rld <- rk.XML.cbox("Root TTR", value="R", chk=TRUE, id.name="Rld"),
      CTTR <- rk.XML.cbox("Corrected TTR (CTTR)", value="CTTR", chk=TRUE, id.name="CTTR"),
      Uld <- rk.XML.cbox("Uber Index", value="U", chk=TRUE, id.name="Uld"),
      rk.XML.stretch(),
      id.name="colLD1"
    ),
    rk.XML.col(
      Sld <- rk.XML.cbox("Summer", value="S", chk=TRUE, id.name="Sld"),
      Kld <- rk.XML.cbox("Yule's K", value="K", chk=TRUE, id.name="Kld"),
      Maas <- rk.XML.cbox("Maas (a, lg(V0))", value="Maas", chk=TRUE, id.name="Maas"),
      HDD <- rk.XML.cbox("HD-D (idealized vocd-D)", value="HD-D", chk=TRUE, id.name="HDD"),
      MTLD <- rk.XML.cbox("Measure of Textual Lexical Diversity (MTLD)", value="MTLD", chk=TRUE, id.name="MTLD"),
      MTLDMA <- rk.XML.cbox("Moving Average MTLD (MTLD-MA)", value="MTLD-MA", chk=TRUE, id.name="MTLDMA"),
      rk.XML.stretch(),
      id.name="colLD2"
    )
  ),
  label="Measures of lexical diversity",
  id.name="LDIndices"
)

LDChars <- rk.XML.frame(
  rk.XML.text("The classic TTR is dependent on text legth. You can examine this effect by repeatedly calculating the measures' value for growing
    portions of your text. You can then plot these characteristics."),
  rk.XML.row(
    rk.XML.col(
      TTRChar <- rk.XML.cbox("Type Token Ratio (TTR)", value="TTR", id.name="TTRChar"),
      MATTRChar <- rk.XML.cbox("Moving Average TTR (MATTR)", value="MATTR", id.name="MATTRChar"),
      CldChar <- rk.XML.cbox("Herdan's C", value="C", id.name="CldChar"),
      RldChar <- rk.XML.cbox("Root TTR", value="R", id.name="RldChar"),
      CTTRChar <- rk.XML.cbox("Corrected TTR (CTTR)", value="CTTR", id.name="CTTRChar"),
      UldChar <- rk.XML.cbox("Uber Index", value="U", id.name="UldChar"),
      rk.XML.stretch(),
      id.name="colLDChar1"
    ),
    rk.XML.col(
      SldChar <- rk.XML.cbox("Summer", value="S", id.name="SldChar"),
      KldChar <- rk.XML.cbox("Yule's K", value="K", id.name="KldChar"),
      MaasChar <- rk.XML.cbox("Maas (a, lg(V0))", value="Maas", id.name="MaasChar"),
      HDDChar <- rk.XML.cbox("HD-D (idealized vocd-D)", value="HD-D", id.name="HDDChar"),
      MTLDChar <- rk.XML.cbox("Measure of Textual Lexical Diversity (MTLD)", value="MTLD", id.name="MTLDChar"),
      MTLDMAChar <- rk.XML.cbox("Moving Average MTLD (MTLD-MA)", value="MTLD-MA", id.name="MTLDMAChar"),
      rk.XML.stretch(),
      id.name="colLDChar"
    )
  ),
  rk.XML.frame(
    stepSize <- rk.XML.spinbox(
      "Step size between calculations (tokens)",
      min=2,
      initial=5,
      real=FALSE,
      id.name="stepSize"
    ),
    label="Accuracy"
  ),
  label="Calculate impact of text length",
  id.name="LDChars"
)

LDOptions <- rk.XML.col(
  rk.XML.row(
    rk.XML.col(
      rk.XML.frame(
        LDcaseSens <- rk.XML.cbox("Case sensitive", id.name="LDcaseSens"),
        LDlemmatize <- rk.XML.cbox("Lemmatize", id.name="LDlemmatize"),
        LDkeepTokens <- rk.XML.cbox("Keep types/tokens in result object", id.name="LDkeepTokens"),
        LDlog <- rk.XML.input("Base for logarithm (must be numeric)", initial=10, required=TRUE, size="small", id.name="LDlog"),
        rk.XML.stretch(),
        label="Global options"
      )
    ),
    rk.XML.col(
      optMSTTR <- rk.XML.frame(
        LDsegment <- rk.XML.spinbox("Segment size (tokens)", min=1, initial=100, real=FALSE, id.name="LDsegment"),
        rk.XML.stretch(),
        label="MSTTR",
        id.name="optMSTTR"
      ),
      optMATTR <- rk.XML.frame(
        LDwindow <- rk.XML.spinbox("Window size (tokens)", min=1, initial=100, real=FALSE, id.name="LDwindow"),
        rk.XML.stretch(),
        label="MATTR",
        id.name="optMATTR"
      )
    )
  ),
  rk.XML.row(
    rk.XML.col(
      optHDD <- rk.XML.frame(
        LDsampleSize <- rk.XML.spinbox("Random sample size (tokens)", min=2, initial=42, real=FALSE, id.name="LDsampleSize"),
        rk.XML.stretch(),
        label="HD-D",
        id.name="optHDD"
      )
    ),
    rk.XML.col(
      optMTLD <- rk.XML.frame(
        LDfactorSize <- rk.XML.spinbox("Factor size", min=0, max=1, initial=0.72, id.name="LDfactorSize"),
        LDminTokens <- rk.XML.spinbox("Minimum number of tokens", min=1, initial=9, real=FALSE, id.name="LDminTokens"),
        LDdetails <- rk.XML.cbox("Keep all details (slow!)", id.name="LDdetails"),
        rk.XML.stretch(),
        label="MTLD/MTLD-MA",
        id.name="optMTLD"
      )
    )
  ),
  rk.XML.stretch(),
  id.name="LDOptions"
)

LDShowTypesFrame <- rk.XML.frame(
  showTypes <- rk.XML.cbox("List all identified types", value="types", id.name="showTypes"),
  label="Output",
  id.name="LDShowTypesFrame"
)

saveLD <- rk.XML.saveobj(label="Keep results", initial="lexical.diversity.obj", chk=TRUE, id.name="saveLD")

kRp.tab.ld <- rk.XML.tabbook(label="Lexical Diversity",
  tabs=list(
  "Data and Basic Indices"=rk.XML.row(
    rk.XML.col(varsHyph),
    rk.XML.col(
       varHyphenTagged,
      rk.XML.stretch(),
    LDIndices,
    LDShowTypesFrame,
    saveLD)),
  "Characteristics"=LDChars,
  "Options"=LDOptions
  )
)

kRp.dialog.ld <- rk.XML.dialog(kRp.tab.ld, label="Lexical Diversity")

kRp.logic.ld <- rk.XML.logic(
  kRp.lgc.ld.CharSteps <- rk.XML.convert(sources=list(
      state=TTRChar,
      state=MATTRChar,
      state=CldChar,
      state=RldChar,
      state=CTTRChar,
      state=UldChar,
      state=SldChar,
      state=KldChar,
      state=MaasChar,
      state=HDDChar,
      state=MTLDChar,
      state=MTLDMAChar
    ), mode=c(or="")
  ),
  rk.XML.connect(governor=kRp.lgc.ld.CharSteps, client=stepSize, set="enabled"),
  rk.XML.connect(governor=showTypes, client=LDkeepTokens, set="state"),
  rk.XML.connect(governor=showTypes, client=LDkeepTokens, set="enabled", not=TRUE),
  kRp.lgc.ld.MATTR <- rk.XML.convert(sources=list(
      state=MATTR,
      state=MATTRChar
    ), mode=c(or="")
  ),
  kRp.lgc.ld.HDD <- rk.XML.convert(sources=list(
      state=HDD,
      state=HDDChar
    ), mode=c(or="")
  ),
  kRp.lgc.ld.MTLD <- rk.XML.convert(sources=list(
      state=MTLD,
      state=MTLDMA,
      state=MTLDChar,
      state=MTLDMAChar
    ), mode=c(or="")
  ),
  rk.XML.connect(governor=MSTTR, client=optMSTTR, set="enabled"),
  rk.XML.connect(governor=kRp.lgc.ld.MATTR, client=optMATTR, set="enabled"),
  rk.XML.connect(governor=kRp.lgc.ld.HDD, client=optHDD, set="enabled"),
  rk.XML.connect(governor=kRp.lgc.ld.MTLD, client=optMTLD, set="enabled")
)


kRp.ld.js.calc <- rk.paste.JS(
  TTR.val <- rk.JS.vars(TTR, var.prefix="value"),
  MSTTR.val <- rk.JS.vars(MSTTR, var.prefix="value"),
  MATTR.val <- rk.JS.vars(MATTR, var.prefix="value"),
  Cld.val <- rk.JS.vars(Cld, var.prefix="value"),
  Rld.val <- rk.JS.vars(Rld, var.prefix="value"),
  CTTR.val <- rk.JS.vars(CTTR, var.prefix="value"),
  Uld.val <- rk.JS.vars(Uld, var.prefix="value"),
  Sld.val <- rk.JS.vars(Sld, var.prefix="value"),
  Kld.val <- rk.JS.vars(Kld, var.prefix="value"),
  Maas.val <- rk.JS.vars(Maas, var.prefix="value"),
  HDD.val <- rk.JS.vars(HDD, var.prefix="value"),
  MTLD.val <- rk.JS.vars(MTLD, var.prefix="value"),
  MTLDMA.val <- rk.JS.vars(MTLDMA, var.prefix="value"),
  kRp.ld.measure.array <- rk.JS.array("measure", variables=list(
    id(TTR.val),
    id(MSTTR.val),
    id(MATTR.val),
    id(Cld.val),
    id(Rld.val),
    id(CTTR.val),
    id(Uld.val),
    id(Sld.val),
    id(Kld.val),
    id(Maas.val),
    id(HDD.val),
    id(MTLD.val),
    id(MTLDMA.val)
  ), quote=TRUE, opt.sep=",\\n\\t"),
  TTRChar.val <- rk.JS.vars(TTRChar, var.prefix="value"),
  MATTRChar.val <- rk.JS.vars(MATTRChar, var.prefix="value"),
  CldChar.val <- rk.JS.vars(CldChar, var.prefix="value"),
  RldChar.val <- rk.JS.vars(RldChar, var.prefix="value"),
  CTTRChar.val <- rk.JS.vars(CTTRChar, var.prefix="value"),
  UldChar.val <- rk.JS.vars(UldChar, var.prefix="value"),
  SldChar.val <- rk.JS.vars(SldChar, var.prefix="value"),
  KldChar.val <- rk.JS.vars(KldChar, var.prefix="value"),
  MaasChar.val <- rk.JS.vars(MaasChar, var.prefix="value"),
  HDDChar.val <- rk.JS.vars(HDDChar, var.prefix="value"),
  MTLDChar.val <- rk.JS.vars(MTLDChar, var.prefix="value"),
  MTLDMAChar.val <- rk.JS.vars(MTLDMAChar, var.prefix="value"),
  kRp.ld.char.array <- rk.JS.array("char", variables=list(
    id(TTRChar.val),
    id(MATTRChar.val),
    id(CldChar.val),
    id(RldChar.val),
    id(CTTRChar.val),
    id(UldChar.val),
    id(SldChar.val),
    id(KldChar.val),
    id(MaasChar.val),
    id(HDDChar.val),
    id(MTLDChar.val),
    id(MTLDMAChar.val)
  ), quote=TRUE, opt.sep=",\\n\\t"),
  kRp.ld.status.opt.MATTR <- rk.JS.vars(optMATTR, modifiers="enabled"),
  kRp.ld.status.opt.MSTTR <- rk.JS.vars(optMSTTR, modifiers="enabled"),
  kRp.ld.status.opt.HDD <- rk.JS.vars(optHDD, modifiers="enabled"),
  kRp.ld.status.opt.MTLD <- rk.JS.vars(optMTLD, modifiers="enabled"),
  echo("lexical.diversity.obj <- lex.div(\n\t",
    varHyphenTagged
  ),
  js(
    if(LDsegment != 100 && kRp.ld.status.opt.MSTTR){
      echo(",\n\tsegment=", LDsegment)
    } else {},
    if(kRp.ld.status.opt.MTLD){
      if(LDfactorSize != 0.72){
        echo(",\n\tfactor.size=", LDfactorSize)
      } else {}
      if(LDminTokens != 9){
        echo(",\n\tmin.tokens=", LDminTokens)
      } else {}
    } else {},
    if(LDsampleSize != 42 && kRp.ld.status.opt.HDD){
      echo(",\n\trand.sample=", LDsampleSize)
    } else {},
    if(LDwindow != 100 && kRp.ld.status.opt.MATTR){
      echo(",\n\twindow=", LDwindow)
    } else {}
  ),
  tf(LDcaseSens, opt="case.sens", level=2),
  tf(LDlemmatize, opt="lemmatize", level=2),
  js(
    if(LDdetails && kRp.ld.status.opt.MTLD){
      echo(",\n\tdetailed=TRUE")
    } else {},
    echo(kRp.ld.measure.array),
    if(kRp.ld.char.array != ""){
      echo(kRp.ld.char.array)
      if(stepSize != 5){
        echo(",\n\tchar.steps=", stepSize)
      } else {}
    } else {
      echo(",\n\tchar=NULL")
    },
    if(LDlog != 10){
      echo(",\n\tlog.base=", LDlog)
    } else {}
  ),
  tf(LDkeepTokens, opt="keep.tokens", level=2),
  echo(",\n\tquiet=TRUE\n)\n\n")
)

kRp.ld.js.print <- rk.paste.JS(
  rk.JS.vars(
    LDsegment,
    LDfactorSize,
    LDminTokens,
    LDsampleSize,
    LDwindow,
    LDcaseSens,
    LDlemmatize,
    LDdetails,
    showTypes
  ),
  rk.JS.header("Lexical diversity results",
    add=c("MSTTR segment size", LDsegment),
    add=c("MTLD factor size", LDfactorSize),
    add=c("MTLD-MA min. tokens/factor", LDminTokens),
    add=c("HD-D random sample size", LDsampleSize),
    add=c("MATTR window size", LDwindow),
    add=c("Case sensitive", LDcaseSens),
    add=c("Lemmatize", LDlemmatize),
    add=c("Keep MTLD/MTLD-MA details", LDdetails)
  ),
  echo("rk.results(summary(lexical.diversity.obj))\n"),
  js(
    if(showTypes){
      rk.JS.header("Identified types in text", level=3)
      echo("rk.print(slot(lexical.diversity.obj, \"tt\")[[\"types\"]])\n")
    } else {}
  ),
  echo("\n")
)

# make a whole component of the lex.div stuff
kRp.ld.component <- rk.plugin.component("Lexical Diversity",
  xml=list(
    logic=kRp.logic.ld,
    dialog=kRp.dialog.ld),
  js=list(results.header=FALSE,
    calculate=kRp.ld.js.calc,
    printout=kRp.ld.js.print
  ),
  hierarchy=list("analysis", "Text Analysis"),
  create=c("xml", "js"),
  gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)

## frequency analysis

freqSourceFrame <- rk.XML.frame(
  corpusDB <- rk.XML.dropdown(
    label="Use corpus database",
    options=list(
      "none"=c(val="none", chk=TRUE),
      "Leipzig Corpora Collection (*-text.tar.gz) "=c(val="LCC"),
      "Celex (*.CD)"=c(val="celex")
    ),
    id.name="corpusDB"
  ),
  corpDBdir <- rk.XML.browser(label="Database file", type="file", required=FALSE, id.name="corpDBdir"),
  CelexRunWd <- rk.XML.spinbox(label="Number of running words", min=1, real=FALSE, id.name="CelexRunWd"),
  rk.XML.stretch(),
  saveCorpFrq <- rk.XML.saveobj(label="Keep corpus object", initial="corp.freq.obj", id.name="saveCorpFrq"),
  label="Frequencies from language corpora",
  id.name="freqSourceFrame"
)

kRp.frq.frm.showFrqWC <- rk.XML.frame(
  tfidf <- rk.XML.cbox("Term frequency/inverse document frequency statistics (tf-idf)", chk=TRUE, id.name="tfidf"),
  freqShowTypes <- rk.XML.cbox("Show frequencies of types & token (by word class)", value="freqWC", id.name="freqShowTypes"),
  label="Descriptive statistics",
  id.name="frameFrqWCShow"
)

saveFrq <- rk.XML.saveobj(label="Keep results", initial="freq.analysis.obj", chk=TRUE, id.name="saveFrq")

varkRpFreqObj <- rk.XML.varslot(
  label="Analyse against frequency object (valid class: kRp.corp.freq)",
  source=varsHyph,
  classes=c("kRp.corp.freq"),
  id.name="varkRpFreqObj"
)

kRp.tab.frq <- rk.XML.tabbook(label="Frequency Analysis",
  tabs=list(
  "Data"=rk.XML.row(
    rk.XML.col(varsHyph),
    rk.XML.col(
      varHyphenTagged,
      varkRpFreqObj,
      rk.XML.stretch(),
      kRp.frq.frm.showFrqWC,
      saveFrq
    )
  ),
  "Corpora"=freqSourceFrame#,
#   "Options"=kRp.frq.col.options
  )
)

kRp.dialog.frq <- rk.XML.dialog(kRp.tab.frq, label="Frequency Analysis")

kRp.logic.frq <- rk.XML.logic(
  kRp.lgc.frq.haveFreqObject <- rk.XML.convert(sources=list(available=varkRpFreqObj), mode=c(and="")),
  rk.XML.connect(governor=kRp.lgc.frq.haveFreqObject, client=freqSourceFrame, not=TRUE),
  kRp.lgc.frq.needCorpDir <- rk.XML.convert(sources=list(string=corpusDB), mode=c(notequals="none"), id.name="convDrpDir"),
  rk.XML.connect(governor=kRp.lgc.frq.needCorpDir, client=corpDBdir, set="enabled"),
  rk.XML.connect(governor=kRp.lgc.frq.needCorpDir, client=corpDBdir, set="required"),
  rk.XML.connect(governor=kRp.lgc.frq.needCorpDir, client=saveCorpFrq, set="enabled"),
  kRp.lgc.frq.noFreqObject <- rk.XML.convert(sources=list(available=varkRpFreqObj), mode=c(equals=""), id.name="convnoFrqObj"),
  kRp.lgc.frq.saveCorp <- rk.XML.convert(sources=list(kRp.lgc.frq.noFreqObject, kRp.lgc.frq.needCorpDir), mode=c(and=""), id.name="convSvCrp"),
  rk.XML.connect(governor=kRp.lgc.frq.saveCorp, client=saveCorpFrq, set="active"),
  kRp.lgc.frq.Celex <- rk.XML.convert(sources=list(string=corpusDB), mode=c(equals="celex"), id.name="convDrpCelex"),
  rk.XML.connect(governor=kRp.lgc.frq.Celex, client=CelexRunWd, set="enabled")
)

kRp.frq.js.calc <- rk.paste.JS(
  js(
    if(!varkRpFreqObj){
      if(corpusDB == "LCC" && corpDBdir){
        echo("corp.freq.obj <- read.corp.LCC(\n\t\"", corpDBdir, "\"\n)\n\n")
      } else if(corpusDB == "celex" && corpDBdir){
        echo("corp.freq.obj <- read.corp.celex(\n\t\"", corpDBdir, "\",\n\trunning.words=", CelexRunWd, "\n)\n\n")
      } else {}
    } else {},
    echo("freq.analysis.obj <- freq.analysis(\n\t",
      varHyphenTagged
    ),
    if(!varkRpFreqObj){
      if(corpusDB != "none"){
        echo(",\n\tcorp.freq=corp.freq.obj")
      } else {}
    } else {
      echo(",\n\tcorp.freq=", varkRpFreqObj)
    },
    if(!tfidf){
      echo(",\n\ttfidf=FALSE")
    } else {}
  ),
  echo("\n)\n\n")
)

kRp.frq.js.print <- rk.paste.JS(
  rk.JS.vars(freqShowTypes),
  rk.JS.header("Frequency analysis results"),
  echo("rk.print(summary(freq.analysis.obj))\n\n"),
  js(
    if(freqShowTypes){
      rk.JS.header("Frequencies of types & token (by word class)", level=3)
      echo("freqTypeToken <- data.frame(\n\t",
        "types=slot(freq.analysis.obj, \"desc\")$freq.types,\n\t",
        "token=slot(freq.analysis.obj, \"desc\")$freq.token\n)\n",
        "rk.print(freqTypeToken)\n\n")
    } else {}
  )
)

kRp.frq.component <- rk.plugin.component("Frequency Analysis",
  xml=list(
    logic=kRp.logic.frq,
    dialog=kRp.dialog.frq),
  js=list(results.header=FALSE,
    calculate=kRp.frq.js.calc,
    printout=kRp.frq.js.print
  ),
  hierarchy=menu.hierarchy,
  create=c("xml", "js"),
  gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)



#############
## if you run the following function call, files will be written to output.dir!
#############
# this is where it get's serious, that is, here all of the above is put together into one plugin

rk.kRp.dir <<- rk.plugin.skeleton(
  about.info,
  path=output.dir,
  guess.getter=guess.getter,
  xml=list(
    logic=kRp.POS.lgc.sect,
    dialog=kRp.dialog.POS),
  js=list(
    results.header=FALSE,
    require="koRpus",
    preprocess=kRp.POS.js.prep,
    calculate=kRp.POS.js.calc,
    printout=kRp.POS.js.print#,
#    load.silencer=var.chk.suppress
  ),
  pluginmap=list(name="Tokenizing & POS tagging", hierarchy=menu.hierarchy),
  components=list(
    kRp.hyph.component,
    kRp.rdb.component,
    kRp.ld.component,
    kRp.frq.component),
  dependencies=dependencies.info,
  create=c("pmap", "xml", "js"),
  overwrite=overwrite,
  tests=FALSE,
#  edit=TRUE,
  load=TRUE,
  hints=FALSE,
  gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R")#,
#  show=TRUE)

  if(isTRUE(update.translations)){
    rk.updatePluginMessages(file.path(output.dir,"koRpus","inst","rkward","koRpus.pluginmap"))
  } else {}
})
unDocUMeantIt/koRpus documentation built on May 21, 2021, 9:26 p.m.