Nothing
# the plugin code was generated by this script
# you should not change the plugin code directly, but this script
# note: this script only creates objects in your workspace,
# *EXCEPT* for the last call, see below.
require(rkwarddev)
rkwarddev.required("0.07-4")
local({
# set the output directory to overwrite the actual plugin
output.dir <- tempdir()
overwrite <- TRUE
# if you set guess.getters to TRUE, the resulting code will need RKWard >= 0.6.0
guess.getter <- TRUE
rk.set.indent(by=" ")
rk.set.empty.e(TRUE)
update.translations <- TRUE
menu.hierarchy <- list("analysis", "Text Analysis")
about.info <- rk.XML.about(
name="koRpus",
author=c(
person(given="Meik", family="Michalke",
email="meik.michalke@hhu.de", role=c("aut","cre"))),
about=list(desc="RKWard GUI for the koRpus package.")
)
dependencies.info <- rk.XML.dependencies(
dependencies=list(rkward.min=ifelse(isTRUE(guess.getter), "0.6.3", "0.5.6"))
)
operationMode <- rk.XML.radio(label="Select operation mode", options=list(
"Tokenize with tokenize()"=c(val="file", chk=TRUE),
"Tokenize and tag with TreeTagger"=c(val="fileTreeTagger")
),
id.name="operationMode"
)
TTRootDir <- rk.XML.browser(
label="TreeTagger root folder:",
type="dir",
id.name="TTRootDir"
)
TTRootText <- rk.XML.text(
"The TreeTagger folder is the one containing the bin, cmd and lib folders",
id.name="TTRootText")
language <- rk.XML.dropdown(label="Text language:", options=list(
"Dutch"=c(val="nl"),
"English"=c(val="en", chk=TRUE),
"French"=c(val="fr"),
"German"=c(val="de"),
"Italian"=c(val="it"),
"Portuguese"=c(val="pt"),
"Russian"=c(val="ru"),
"Spanish"=c(val="es")
),
id.name="language"
)
textFile <- rk.XML.browser(
label="Text to analyze:",
filter="*.txt",
id.name="textFile"
)
kRp.POS.frm.TT <- rk.XML.frame(
TTRootDir,
TTRootText,
language,
textFile
)
detectHeadlines <- rk.XML.cbox(
label="Detect headlines (treated as sentences)",
value="hline=TRUE",
id.name="detectHeadlines"
)
detectParagraphs <- rk.XML.cbox(
label="Detect paragraphs",
value="parag=TRUE",
id.name="detectParagraphs"
)
tknzOptions <- rk.XML.frame(
detectHeadlines,
detectParagraphs,
label="tokenize() options",
id.name="tknzOptions"
)
showTaggedFrame <- rk.XML.frame(
showTagged <- rk.XML.cbox(
label="List tokenized results in output (slow for long texts!)",
value="tagged",
id.name="showTagged"),
id.name="showTaggedFrame"
)
saveTaggedText <- rk.XML.saveobj(label="Keep tagged text object", initial="tagged.text.obj", chk=TRUE, id.name="saveTaggedText")
kRp.dialog.POS <- rk.XML.dialog(
rk.XML.row(
rk.XML.col(
operationMode,
kRp.POS.frm.TT,
tknzOptions,
rk.XML.stretch(),
showTaggedFrame,
saveTaggedText
)
),
label="POS Tagging"
)
## logic section
kRp.POS.lgc.sect <- rk.XML.logic(
kRp.POS.lgc.tagModeTokenize <- rk.XML.convert(sources=list(string=operationMode), mode=c(equals="file")),
kRp.POS.lgc.tagModeTreeTagger <- rk.XML.convert(sources=list(string=operationMode), mode=c(equals="fileTreeTagger")),
rk.XML.connect(governor=kRp.POS.lgc.tagModeTreeTagger, client=TTRootText, set="visible"),
rk.XML.connect(governor=kRp.POS.lgc.tagModeTreeTagger, client=TTRootDir, set="visible"),
rk.XML.connect(governor=kRp.POS.lgc.tagModeTreeTagger, client=TTRootDir, set="required"),
rk.XML.connect(governor=kRp.POS.lgc.tagModeTokenize, client=tknzOptions, set="visible")
)
## JavaScript
kRp.POS.js.prep <- rk.paste.JS(
rk.JS.vars(language),
js(
if(language == "nl"){
echo("require(koRpus.lang.nl)\n")
} else if(language == "pt"){
echo("require(koRpus.lang.pt)\n")
} else {}
)
)
kRp.POS.js.lang <- rk.JS.vars("TTLang")
kRp.POS.js.calc <- rk.paste.JS(
# these are probably fetched as boolean, ensure we get the character values
kRp.POS.js.arr.tkheadl <- rk.JS.vars(detectHeadlines, var.prefix="value"),
kRp.POS.js.arr.tkparag <- rk.JS.vars(detectParagraphs, var.prefix="value"),
kRp.POS.js.array <- rk.JS.array("detect", variables=list(id(kRp.POS.js.arr.tkheadl), id(kRp.POS.js.arr.tkparag)), opt.sep=",\\n\\t"),
js(
id("var ", kRp.POS.js.lang, " = ", language, ";")
),
js(
if(operationMode == "file"){
echo("tagged.text.obj <- tokenize(\n\t\"", textFile, "\",\n\tlang=\"", kRp.POS.js.lang, "\"", kRp.POS.js.array, "\n)\n\n")
} else {
echo("tagged.text.obj <- treetag(\n\t\"", textFile, "\",\n\ttreetagger=\"manual\",\n\tlang=\"", kRp.POS.js.lang, "\",\n\tTT.options=list(path=\"", TTRootDir, "\",\n\tpreset=\"", language, "\")\n)\n\n")
}
)
)
kRp.POS.js.print <- rk.paste.JS(
rk.JS.vars(textFile, language, showTagged),
rk.JS.header("Tokenizing & POS tagging results",
add=c("Text", textFile),
add=c("Language", language)
),
rk.JS.header("Word class distribution", level=3),
# echo("\trk.print.literal(paste0(\"<strong>Sentences: </strong>\", describe(tagged.text.obj)$sentences))\n"),
# echo("\trk.print.literal(paste0(\"<strong>Words: </strong>\", describe(tagged.text.obj)$words,\n\t\t",
# "\" (\", round(describe(tagged.text.obj)$avg.sentc.length, digits=2), \" per sentence)\"))\n"),
# echo("\trk.print.literal(paste0(\"<strong>Letters: </strong>\", describe(tagged.text.obj)$letters[[\"all\"]],\n\t\t",
# "\" (\", round(describe(tagged.text.obj)$avg.word.length, digits=2), \" per word)\"))\n"),
echo("rk.print(summary(tagged.text.obj))\n"),
js(
if(showTagged){
rk.JS.header("Tagged text", level=3)
echo("rk.print(taggedText(tagged.text.obj))\n")
}
),
echo("\n")
)
### hyphenation
varsHyph <- rk.XML.varselector(id.name="varsHyph")
varHyphenTagged <- rk.XML.varslot(
label="Tokenized text object (valid class: kRp.tagged)",
source=varsHyph,
classes=c("kRp.tagged"),
required=TRUE,
id.name="varHyphenTagged"
)
showHyphenationFrame <- rk.XML.frame(
showHyphenation <- rk.XML.cbox(
label="List hyphenation results in output",
value="hyph",
id.name="showHyphenation"
),
id.name="showHyphenationFrame"
)
saveHyphen <- rk.XML.saveobj(label="Keep hyphenated text object", initial="hyphenated.text.obj", chk=TRUE, id.name="saveHyphen")
kRp.tab.hyph <- list(
rk.XML.row(
rk.XML.col(varsHyph),
rk.XML.col(
varHyphenTagged,
rk.XML.stretch(),
showHyphenationFrame,
saveHyphen
),
id.name="rowHyph")
)
kRp.dialog.hyph <- rk.XML.dialog(kRp.tab.hyph, label="Hyphenation")
kRp.hyph.js.calc <- rk.paste.JS(
echo("hyphenated.text.obj <- hyphen(\n\t", varHyphenTagged, ",\n\tquiet=TRUE\n)\n\n")
)
kRp.hyph.js.print <- rk.paste.JS(
rk.JS.vars(showHyphenation),
js(
if(showHyphenation){
echo("rk.print(hyphenated.text.obj@hyphen)\n\n")
}
)
)
kRp.hyph.component <- rk.plugin.component("Hyphenation",
xml=list(
dialog=kRp.dialog.hyph),
js=list(
calculate=kRp.hyph.js.calc,
printout=kRp.hyph.js.print
),
guess.getter=guess.getter,
hierarchy=menu.hierarchy,
create=c("xml", "js"),
gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)
## readability
readabilityIndicesFrame <- rk.XML.frame(
rk.XML.row(
rk.XML.col(
ARI <- rk.XML.cbox(label="Automated Readability Index (ARI)", value="ARI", chk=TRUE, id.name="ARI"),
ARINRI <- rk.XML.cbox(label="ARI (NRI)", value="ARI.NRI", chk=TRUE, id.name="ARINRI"),
ColemanLiau <- rk.XML.cbox(label="Coleman-Liau", value="Coleman.Liau", chk=TRUE, id.name="ColemanLiau"),
DanielsonBryan <- rk.XML.cbox(label="Danielson-Bryan (D 1+2)", value="Danielson.Bryan", chk=TRUE, id.name="DanielsonBryan")
),
rk.XML.col(
DickesSteiwer <- rk.XML.cbox(label="Dickes-Steiwer Handformel", value="Dickes.Steiwer", chk=TRUE, id.name="DickesSteiwer"),
Fucks <- rk.XML.cbox(label="Fucks' Stilcharakteristik", value="Fucks", chk=TRUE, id.name="Fucks"),
LIX <- rk.XML.cbox(label="Läsbarhetsindex (LIX)", value="LIX", chk=TRUE, id.name="LIX"),
RIX <- rk.XML.cbox(label="Readability Index (RIX)", value="RIX", chk=TRUE, id.name="RIX")
)
),
id.name="readabilityIndicesFrame"
)
readabilityNeedSylls <- rk.XML.col(
rk.XML.text("Selecting any of these indices will automatically activate syllable count."),
rk.XML.row(
rk.XML.col(
Coleman <- rk.XML.cbox(label="Coleman (C 1-4)", value="Coleman", id.name="Coleman"),
ELF <- rk.XML.cbox(label="Easy Listening Formula", value="ELF", id.name="ELF"),
FarrJenkinsPaterson <- rk.XML.cbox(label="Farr-Jenkins-Paterson", value="Farr.Jenkins.Paterson", id.name="FarrJenkinsPaterson"),
FarrJenkinsPatersonPSK <- rk.XML.cbox(label="Farr-Jenkins-Paterson (Powers-Sumner-Kearl)", value="Farr.Jenkins.Paterson.PSK", id.name="FarrJenkinsPatersonPSK"),
Flesch <- rk.XML.cbox(label="Flesch Reading Ease", value="Flesch", id.name="Flesch"),
FleschDE <- rk.XML.cbox(label="Flesch (DE, Amstad)", value="Flesch.de", id.name="FleschDE"),
FleschES <- rk.XML.cbox(label="Flesch (ES, Fernandez-Huerta)", value="Flesch.es", id.name="FleschES"),
FleschSzigriszt <- rk.XML.cbox(label="Flesch (ES, Szigriszt)", value="Flesch.Szigriszt", id.name="FleschSzigriszt"),
FleschFR <- rk.XML.cbox(label="Flesch (FR, Kandel-Moles)", value="Flesch.fr", id.name="FleschFR"),
FleschNL <- rk.XML.cbox(label="Flesch (NL, Douma)", value="Flesch.nl", id.name="FleschNL"),
FleschNLB <- rk.XML.cbox(label="Flesch (NL, Brouwer)", value="Flesch.nl-b", id.name="FleschNLB"),
FleschPSK <- rk.XML.cbox(label="Flesch Reading Ease (Powers-Sumner-Kearl)", value="Flesch.PSK", id.name="FleschPSK"),
FleschKincaid <- rk.XML.cbox(label="Flesch-Kincaid Grade Level", value="Flesch.Kincaid", id.name="FleschKincaid"),
FOG <- rk.XML.cbox(label="FOG (Gunning)", value="FOG", id.name="FOG"),
FOGPSK <- rk.XML.cbox(label="FOG (Powers-Sumner-Kearl)", value="FOG.PSK", id.name="FOGPSK"),
rk.XML.stretch()
),
rk.XML.col(
FOGNRI <- rk.XML.cbox(label="FOG (NRI)", value="FOG.NRI", id.name="FOGNRI"),
FORCAST <- rk.XML.cbox(label="FORCAST", value="FORCAST", id.name="FORCAST"),
FORCASTRGL <- rk.XML.cbox(label="FORCAST (RGL)", value="FORCAST.RGL", id.name="FORCASTRGL"),
TRI <- rk.XML.cbox(label="Kuntzsch's Text-Redundanz-Index", value="TRI", id.name="TRI"),
LinsearWrite <- rk.XML.cbox(label="Linsear Write", value="Linsear.Write", id.name="LinsearWrite"),
SMOG <- rk.XML.cbox(label="SMOG", value="SMOG", id.name="SMOG"),
SMOGC <- rk.XML.cbox(label="SMOG (formula C)", value="SMOG.C", id.name="SMOGC"),
SMOGsimple <- rk.XML.cbox(label="SMOG (simple)", value="SMOG.simple", id.name="SMOGsimple"),
Qu <- rk.XML.cbox(label="SMOG (DE, »Qu«)", value="SMOG.de", id.name="Qu"),
Strain <- rk.XML.cbox(label="Strain Index", value="Strain", id.name="Strain"),
Tuldava <- rk.XML.cbox(label="Tuldava", value="Tuldava", id.name="Tuldava"),
WheelerSmith <- rk.XML.cbox(label="Wheeler-Smith", value="Wheeler.Smith", id.name="WheelerSmith"),
WheelerSmithDE <- rk.XML.cbox(label="Wheeler-Smith (DE)", value="Wheeler.Smith.de", id.name="WheelerSmithDE"),
nWS <- rk.XML.cbox(label="Wiener Sachtextformeln (nWS 1-4)", value="nWS", id.name="nWS"),
rk.XML.stretch()
)
),
label="Formulae that need syllable count",
id.name="readabilityNeedSylls"
)
readabilityNeedWL <- rk.XML.col(
rk.XML.text("If you select any of these indices you will also need to provide word lists as indicated below."),
rk.XML.row(
rk.XML.col(
Bormuth <- rk.XML.cbox(label="Bormuth Mean Cloze + Grade", value="Bormuth", id.name="Bormuth"),
DaleChall <- rk.XML.cbox(label="Dale-Chall (1995)", value="Dale.Chall", id.name="DaleChall"),
DaleChallPSK <- rk.XML.cbox(label="Dale-Chall (Powers-Sumner-Kearl)", value="Dale.Chall.PSK", id.name="DaleChallPSK"),
DaleChallOld <- rk.XML.cbox(label="Dale-Chall (1948)", value="Dale.Chall.old", id.name="DaleChallOld"),
rk.XML.stretch()
),
rk.XML.col(
DRP <- rk.XML.cbox(label="Degrees of Reading Power", value="DRP", id.name="DRP"),
HarrisJacobson <- rk.XML.cbox(label="Harris-Jacobson", value="Harris.Jacobson", id.name="HarrisJacobson"),
Spache <- rk.XML.cbox(label="Spache", value="Spache", id.name="Spache"),
SpacheOld <- rk.XML.cbox(label="Spache (old)", value="Spache.old", id.name="SpacheOld"),
rk.XML.stretch()
)
),
rk.XML.row(
rk.XML.col(
readbWLldc <- rk.XML.browser(label="Long Dale-Chall word list (*.txt, also used for Bormuth/DRP):", filter="*.txt", required=FALSE, id.name="readbWLldc"),
readbWLsdc <- rk.XML.browser(label="Short Dale-Chall word list (*.txt):", filter="*.txt", required=FALSE, id.name="readbWLsdc"),
readbWLHaJa <- rk.XML.browser(label="Harris-Jacobson word list (grades 1 and 2, *.txt):", filter="*.txt", required=FALSE, id.name="readbWLHaJa")
),
id.name="rowWLfile"
),
id.name="readabilityNeedWL",
label="Formulae that need word lists"
)
saveReadb <- rk.XML.saveobj(label="Keep results", initial="readability.obj", chk=TRUE, id.name="saveReadb")
varTaggedHyphenated <- rk.XML.varslot(
label="Hyphenated text object (optional, valid class: kRp.hyphen)",
source=varsHyph,
classes=c("kRp.hyphen"),
id.name="varTaggedHyphenated"
)
kRp.tab.rdb <- rk.XML.tabbook(label="Readability",
tabs=list(
"Data and Basic Indices"=rk.XML.row(
rk.XML.col(varsHyph),
rk.XML.col(
varHyphenTagged,
varTaggedHyphenated,
rk.XML.stretch(),
readabilityIndicesFrame,
saveReadb)),
"Using Syllables"=readabilityNeedSylls,
"Using Word Lists"=readabilityNeedWL
)
)
kRp.dialog.rdb <- rk.XML.dialog(kRp.tab.rdb, label="Readability")
kRp.logic.rdb <- rk.XML.logic(
kRp.lgc.rdb.LongDCWL <- rk.XML.convert(sources=list(
state=DaleChall,
state=DaleChallPSK,
state=DaleChallOld,
state=Bormuth,
state=DRP), mode=c(or="")),
kRp.lgc.rdb.ShortDCWL <- rk.XML.convert(sources=c(
state=Spache,
state=SpacheOld), mode=c(or="")),
rk.XML.connect(governor=kRp.lgc.rdb.LongDCWL, client=readbWLldc, set="enabled"),
rk.XML.connect(governor=kRp.lgc.rdb.LongDCWL, client=readbWLldc, set="required"),
rk.XML.connect(governor=kRp.lgc.rdb.ShortDCWL, client=readbWLsdc, set="enabled"),
rk.XML.connect(governor=kRp.lgc.rdb.ShortDCWL, client=readbWLsdc, set="required"),
rk.XML.connect(governor=HarrisJacobson, client=readbWLHaJa, set="enabled"),
rk.XML.connect(governor=HarrisJacobson, client=readbWLHaJa, set="required")
)
kRp.rdb.js.calc <- rk.paste.JS(
ARI.val <- rk.JS.vars(ARI, var.prefix="value"),
ARINRI.val <- rk.JS.vars(ARINRI, var.prefix="value"),
Bormuth.val <- rk.JS.vars(Bormuth, var.prefix="value"),
Coleman.val <- rk.JS.vars(Coleman, var.prefix="value"),
ColemanLiau.val <- rk.JS.vars(ColemanLiau, var.prefix="value"),
DaleChall.val <- rk.JS.vars(DaleChall, var.prefix="value"),
DaleChallPSK.val <- rk.JS.vars(DaleChallPSK, var.prefix="value"),
DaleChallOld.val <- rk.JS.vars(DaleChallOld, var.prefix="value"),
DanielsonBryan.val <- rk.JS.vars(DanielsonBryan, var.prefix="value"),
DickesSteiwer.val <- rk.JS.vars(DickesSteiwer, var.prefix="value"),
DRP.val <- rk.JS.vars(DRP, var.prefix="value"),
ELF.val <- rk.JS.vars(ELF, var.prefix="value"),
FarrJenkinsPaterson.val <- rk.JS.vars(FarrJenkinsPaterson, var.prefix="value"),
FarrJenkinsPatersonPSK.val <- rk.JS.vars(FarrJenkinsPatersonPSK, var.prefix="value"),
Flesch.val <- rk.JS.vars(Flesch, var.prefix="value"),
FleschES.val <- rk.JS.vars(FleschES, var.prefix="value"),
FleschSzigriszt.val <- rk.JS.vars(FleschSzigriszt, var.prefix="value"),
FleschNL.val <- rk.JS.vars(FleschNL, var.prefix="value"),
FleschNLB.val <- rk.JS.vars(FleschNLB, var.prefix="value"),
FleschDE.val <- rk.JS.vars(FleschDE, var.prefix="value"),
FleschFR.val <- rk.JS.vars(FleschFR, var.prefix="value"),
FleschPSK.val <- rk.JS.vars(FleschPSK, var.prefix="value"),
FleschKincaid.val <- rk.JS.vars(FleschKincaid, var.prefix="value"),
FOG.val <- rk.JS.vars(FOG, var.prefix="value"),
FOGPSK.val <- rk.JS.vars(FOGPSK, var.prefix="value"),
FOGNRI.val <- rk.JS.vars(FOGNRI, var.prefix="value"),
FORCAST.val <- rk.JS.vars(FORCAST, var.prefix="value"),
FORCASTRGL.val <- rk.JS.vars(FORCASTRGL, var.prefix="value"),
Fucks.val <- rk.JS.vars(Fucks, var.prefix="value"),
HarrisJacobson.val <- rk.JS.vars(HarrisJacobson, var.prefix="value"),
LinsearWrite.val <- rk.JS.vars(LinsearWrite, var.prefix="value"),
LIX.val <- rk.JS.vars(LIX, var.prefix="value"),
nWS.val <- rk.JS.vars(nWS, var.prefix="value"),
Qu.val <- rk.JS.vars(Qu, var.prefix="value"),
RIX.val <- rk.JS.vars(RIX, var.prefix="value"),
SMOG.val <- rk.JS.vars(SMOG, var.prefix="value"),
SMOGC.val <- rk.JS.vars(SMOGC, var.prefix="value"),
SMOGsimple.val <- rk.JS.vars(SMOGsimple, var.prefix="value"),
Strain.val <- rk.JS.vars(Strain, var.prefix="value"),
Spache.val <- rk.JS.vars(Spache, var.prefix="value"),
SpacheOld.val <- rk.JS.vars(SpacheOld, var.prefix="value"),
TRI.val <- rk.JS.vars(TRI, var.prefix="value"),
Tuldava.val <- rk.JS.vars(Tuldava, var.prefix="value"),
WheelerSmith.val <- rk.JS.vars(WheelerSmith, var.prefix="value"),
WheelerSmithDE.val <- rk.JS.vars(WheelerSmithDE, var.prefix="value"),
kRp.rdb.array <- rk.JS.array("index", variables=list(
id(ARI.val),
id(ARINRI.val),
id(Bormuth.val),
id(Coleman.val),
id(ColemanLiau.val),
id(DaleChall.val),
id(DaleChallPSK.val),
id(DaleChallOld.val),
id(DanielsonBryan.val),
id(DickesSteiwer.val),
id(DRP.val),
id(ELF.val),
id(FarrJenkinsPaterson.val),
id(FarrJenkinsPatersonPSK.val),
id(Flesch.val),
id(FleschDE.val),
id(FleschES.val),
id(FleschSzigriszt.val),
id(FleschFR.val),
id(FleschNL.val),
id(FleschNLB.val),
id(FleschPSK.val),
id(FleschKincaid.val),
id(FOG.val),
id(FOGPSK.val),
id(FOGNRI.val),
id(FORCAST.val),
id(FORCASTRGL.val),
id(Fucks.val),
id(HarrisJacobson.val),
id(LinsearWrite.val),
id(LIX.val),
id(nWS.val),
id(Qu.val),
id(RIX.val),
id(SMOG.val),
id(SMOGC.val),
id(SMOGsimple.val),
id(Spache.val),
id(SpacheOld.val),
id(Strain.val),
id(TRI.val),
id(Tuldava.val),
id(WheelerSmith.val),
id(WheelerSmithDE.val)
), quote=TRUE, opt.sep=",\\n\\t"),
kRp.rdb.JS.wordLists <- rk.JS.options("rdbWordLists",
# word.lists = list(Bormuth = NULL, Dale.Chall = NULL, Harris.Jacobson = NULL, Spache = NULL)
ite(
id("(", Bormuth, " || ", DRP, ") && ", readbWLldc),
qp("\n\t\tBormuth=\"", readbWLldc, "\"")
),
ite(
id("(", DaleChall, " || ", DaleChallPSK, " || ", DaleChallOld, ") && ", readbWLldc),
qp("\n\t\tDale.Chall=\"", readbWLldc, "\"")
),
ite(
id(HarrisJacobson, " && ", readbWLHaJa),
qp("\n\t\tHarris.Jacobson=\"", readbWLHaJa, "\"")
),
ite(
id("(", Spache, " || ", SpacheOld, ") && ", readbWLsdc),
qp("\n\t\tSpache=\"", readbWLsdc, "\"")
),
option="word.lists",
funct="list",
opt.sep=",\\n\\t"),
echo("readability.obj <- readability(\n\t", varHyphenTagged, kRp.rdb.array),
js(
if(varTaggedHyphenated != ""){
echo(",\n\thyphen=", varTaggedHyphenated)
} else {}
),
echo(kRp.rdb.JS.wordLists),
echo(",\n\tquiet=TRUE\n)\n\n")
)
kRp.rdb.js.print <- rk.paste.JS(
echo("rk.results(summary(readability.obj))\n")
)
kRp.rdb.component <- rk.plugin.component("Readability",
xml=list(
logic=kRp.logic.rdb,
dialog=kRp.dialog.rdb),
js=list(
calculate=kRp.rdb.js.calc,
printout=kRp.rdb.js.print
),
guess.getter=guess.getter,
hierarchy=menu.hierarchy,
create=c("xml", "js"),
gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)
## lexical diversity
LDIndices <- rk.XML.frame(
rk.XML.row(
rk.XML.col(
TTR <- rk.XML.cbox("Type Token Ratio (TTR)", value="TTR", chk=TRUE, id.name="TTR"),
MSTTR <- rk.XML.cbox("Mean Segmental TTR (MSTTR)", value="MSTTR", chk=TRUE, id.name="MSTTR"),
MATTR <- rk.XML.cbox("Moving Average TTR (MATTR)", value="MATTR", chk=TRUE, id.name="MATTR"),
Cld <- rk.XML.cbox("Herdan's C", value="C", chk=TRUE, id.name="Cld"),
Rld <- rk.XML.cbox("Root TTR", value="R", chk=TRUE, id.name="Rld"),
CTTR <- rk.XML.cbox("Corrected TTR (CTTR)", value="CTTR", chk=TRUE, id.name="CTTR"),
Uld <- rk.XML.cbox("Uber Index", value="U", chk=TRUE, id.name="Uld"),
rk.XML.stretch(),
id.name="colLD1"
),
rk.XML.col(
Sld <- rk.XML.cbox("Summer", value="S", chk=TRUE, id.name="Sld"),
Kld <- rk.XML.cbox("Yule's K", value="K", chk=TRUE, id.name="Kld"),
Maas <- rk.XML.cbox("Maas (a, lg(V0))", value="Maas", chk=TRUE, id.name="Maas"),
HDD <- rk.XML.cbox("HD-D (idealized vocd-D)", value="HD-D", chk=TRUE, id.name="HDD"),
MTLD <- rk.XML.cbox("Measure of Textual Lexical Diversity (MTLD)", value="MTLD", chk=TRUE, id.name="MTLD"),
MTLDMA <- rk.XML.cbox("Moving Average MTLD (MTLD-MA)", value="MTLD-MA", chk=TRUE, id.name="MTLDMA"),
rk.XML.stretch(),
id.name="colLD2"
)
),
label="Measures of lexical diversity",
id.name="LDIndices"
)
LDChars <- rk.XML.frame(
rk.XML.text("The classic TTR is dependent on text legth. You can examine this effect by repeatedly calculating the measures' value for growing
portions of your text. You can then plot these characteristics."),
rk.XML.row(
rk.XML.col(
TTRChar <- rk.XML.cbox("Type Token Ratio (TTR)", value="TTR", id.name="TTRChar"),
MATTRChar <- rk.XML.cbox("Moving Average TTR (MATTR)", value="MATTR", id.name="MATTRChar"),
CldChar <- rk.XML.cbox("Herdan's C", value="C", id.name="CldChar"),
RldChar <- rk.XML.cbox("Root TTR", value="R", id.name="RldChar"),
CTTRChar <- rk.XML.cbox("Corrected TTR (CTTR)", value="CTTR", id.name="CTTRChar"),
UldChar <- rk.XML.cbox("Uber Index", value="U", id.name="UldChar"),
rk.XML.stretch(),
id.name="colLDChar1"
),
rk.XML.col(
SldChar <- rk.XML.cbox("Summer", value="S", id.name="SldChar"),
KldChar <- rk.XML.cbox("Yule's K", value="K", id.name="KldChar"),
MaasChar <- rk.XML.cbox("Maas (a, lg(V0))", value="Maas", id.name="MaasChar"),
HDDChar <- rk.XML.cbox("HD-D (idealized vocd-D)", value="HD-D", id.name="HDDChar"),
MTLDChar <- rk.XML.cbox("Measure of Textual Lexical Diversity (MTLD)", value="MTLD", id.name="MTLDChar"),
MTLDMAChar <- rk.XML.cbox("Moving Average MTLD (MTLD-MA)", value="MTLD-MA", id.name="MTLDMAChar"),
rk.XML.stretch(),
id.name="colLDChar"
)
),
rk.XML.frame(
stepSize <- rk.XML.spinbox(
"Step size between calculations (tokens)",
min=2,
initial=5,
real=FALSE,
id.name="stepSize"
),
label="Accuracy"
),
label="Calculate impact of text length",
id.name="LDChars"
)
LDOptions <- rk.XML.col(
rk.XML.row(
rk.XML.col(
rk.XML.frame(
LDcaseSens <- rk.XML.cbox("Case sensitive", id.name="LDcaseSens"),
LDlemmatize <- rk.XML.cbox("Lemmatize", id.name="LDlemmatize"),
LDkeepTokens <- rk.XML.cbox("Keep types/tokens in result object", id.name="LDkeepTokens"),
LDlog <- rk.XML.input("Base for logarithm (must be numeric)", initial=10, required=TRUE, size="small", id.name="LDlog"),
rk.XML.stretch(),
label="Global options"
)
),
rk.XML.col(
optMSTTR <- rk.XML.frame(
LDsegment <- rk.XML.spinbox("Segment size (tokens)", min=1, initial=100, real=FALSE, id.name="LDsegment"),
rk.XML.stretch(),
label="MSTTR",
id.name="optMSTTR"
),
optMATTR <- rk.XML.frame(
LDwindow <- rk.XML.spinbox("Window size (tokens)", min=1, initial=100, real=FALSE, id.name="LDwindow"),
rk.XML.stretch(),
label="MATTR",
id.name="optMATTR"
)
)
),
rk.XML.row(
rk.XML.col(
optHDD <- rk.XML.frame(
LDsampleSize <- rk.XML.spinbox("Random sample size (tokens)", min=2, initial=42, real=FALSE, id.name="LDsampleSize"),
rk.XML.stretch(),
label="HD-D",
id.name="optHDD"
)
),
rk.XML.col(
optMTLD <- rk.XML.frame(
LDfactorSize <- rk.XML.spinbox("Factor size", min=0, max=1, initial=0.72, id.name="LDfactorSize"),
LDminTokens <- rk.XML.spinbox("Minimum number of tokens", min=1, initial=9, real=FALSE, id.name="LDminTokens"),
LDdetails <- rk.XML.cbox("Keep all details (slow!)", id.name="LDdetails"),
rk.XML.stretch(),
label="MTLD/MTLD-MA",
id.name="optMTLD"
)
)
),
rk.XML.stretch(),
id.name="LDOptions"
)
LDShowTypesFrame <- rk.XML.frame(
showTypes <- rk.XML.cbox("List all identified types", value="types", id.name="showTypes"),
label="Output",
id.name="LDShowTypesFrame"
)
saveLD <- rk.XML.saveobj(label="Keep results", initial="lexical.diversity.obj", chk=TRUE, id.name="saveLD")
kRp.tab.ld <- rk.XML.tabbook(label="Lexical Diversity",
tabs=list(
"Data and Basic Indices"=rk.XML.row(
rk.XML.col(varsHyph),
rk.XML.col(
varHyphenTagged,
rk.XML.stretch(),
LDIndices,
LDShowTypesFrame,
saveLD)),
"Characteristics"=LDChars,
"Options"=LDOptions
)
)
kRp.dialog.ld <- rk.XML.dialog(kRp.tab.ld, label="Lexical Diversity")
kRp.logic.ld <- rk.XML.logic(
kRp.lgc.ld.CharSteps <- rk.XML.convert(sources=list(
state=TTRChar,
state=MATTRChar,
state=CldChar,
state=RldChar,
state=CTTRChar,
state=UldChar,
state=SldChar,
state=KldChar,
state=MaasChar,
state=HDDChar,
state=MTLDChar,
state=MTLDMAChar
), mode=c(or="")
),
rk.XML.connect(governor=kRp.lgc.ld.CharSteps, client=stepSize, set="enabled"),
rk.XML.connect(governor=showTypes, client=LDkeepTokens, set="state"),
rk.XML.connect(governor=showTypes, client=LDkeepTokens, set="enabled", not=TRUE),
kRp.lgc.ld.MATTR <- rk.XML.convert(sources=list(
state=MATTR,
state=MATTRChar
), mode=c(or="")
),
kRp.lgc.ld.HDD <- rk.XML.convert(sources=list(
state=HDD,
state=HDDChar
), mode=c(or="")
),
kRp.lgc.ld.MTLD <- rk.XML.convert(sources=list(
state=MTLD,
state=MTLDMA,
state=MTLDChar,
state=MTLDMAChar
), mode=c(or="")
),
rk.XML.connect(governor=MSTTR, client=optMSTTR, set="enabled"),
rk.XML.connect(governor=kRp.lgc.ld.MATTR, client=optMATTR, set="enabled"),
rk.XML.connect(governor=kRp.lgc.ld.HDD, client=optHDD, set="enabled"),
rk.XML.connect(governor=kRp.lgc.ld.MTLD, client=optMTLD, set="enabled")
)
kRp.ld.js.calc <- rk.paste.JS(
TTR.val <- rk.JS.vars(TTR, var.prefix="value"),
MSTTR.val <- rk.JS.vars(MSTTR, var.prefix="value"),
MATTR.val <- rk.JS.vars(MATTR, var.prefix="value"),
Cld.val <- rk.JS.vars(Cld, var.prefix="value"),
Rld.val <- rk.JS.vars(Rld, var.prefix="value"),
CTTR.val <- rk.JS.vars(CTTR, var.prefix="value"),
Uld.val <- rk.JS.vars(Uld, var.prefix="value"),
Sld.val <- rk.JS.vars(Sld, var.prefix="value"),
Kld.val <- rk.JS.vars(Kld, var.prefix="value"),
Maas.val <- rk.JS.vars(Maas, var.prefix="value"),
HDD.val <- rk.JS.vars(HDD, var.prefix="value"),
MTLD.val <- rk.JS.vars(MTLD, var.prefix="value"),
MTLDMA.val <- rk.JS.vars(MTLDMA, var.prefix="value"),
kRp.ld.measure.array <- rk.JS.array("measure", variables=list(
id(TTR.val),
id(MSTTR.val),
id(MATTR.val),
id(Cld.val),
id(Rld.val),
id(CTTR.val),
id(Uld.val),
id(Sld.val),
id(Kld.val),
id(Maas.val),
id(HDD.val),
id(MTLD.val),
id(MTLDMA.val)
), quote=TRUE, opt.sep=",\\n\\t"),
TTRChar.val <- rk.JS.vars(TTRChar, var.prefix="value"),
MATTRChar.val <- rk.JS.vars(MATTRChar, var.prefix="value"),
CldChar.val <- rk.JS.vars(CldChar, var.prefix="value"),
RldChar.val <- rk.JS.vars(RldChar, var.prefix="value"),
CTTRChar.val <- rk.JS.vars(CTTRChar, var.prefix="value"),
UldChar.val <- rk.JS.vars(UldChar, var.prefix="value"),
SldChar.val <- rk.JS.vars(SldChar, var.prefix="value"),
KldChar.val <- rk.JS.vars(KldChar, var.prefix="value"),
MaasChar.val <- rk.JS.vars(MaasChar, var.prefix="value"),
HDDChar.val <- rk.JS.vars(HDDChar, var.prefix="value"),
MTLDChar.val <- rk.JS.vars(MTLDChar, var.prefix="value"),
MTLDMAChar.val <- rk.JS.vars(MTLDMAChar, var.prefix="value"),
kRp.ld.char.array <- rk.JS.array("char", variables=list(
id(TTRChar.val),
id(MATTRChar.val),
id(CldChar.val),
id(RldChar.val),
id(CTTRChar.val),
id(UldChar.val),
id(SldChar.val),
id(KldChar.val),
id(MaasChar.val),
id(HDDChar.val),
id(MTLDChar.val),
id(MTLDMAChar.val)
), quote=TRUE, opt.sep=",\\n\\t"),
kRp.ld.status.opt.MATTR <- rk.JS.vars(optMATTR, modifiers="enabled"),
kRp.ld.status.opt.MSTTR <- rk.JS.vars(optMSTTR, modifiers="enabled"),
kRp.ld.status.opt.HDD <- rk.JS.vars(optHDD, modifiers="enabled"),
kRp.ld.status.opt.MTLD <- rk.JS.vars(optMTLD, modifiers="enabled"),
echo("lexical.diversity.obj <- lex.div(\n\t",
varHyphenTagged
),
js(
if(LDsegment != 100 && kRp.ld.status.opt.MSTTR){
echo(",\n\tsegment=", LDsegment)
} else {},
if(kRp.ld.status.opt.MTLD){
if(LDfactorSize != 0.72){
echo(",\n\tfactor.size=", LDfactorSize)
} else {}
if(LDminTokens != 9){
echo(",\n\tmin.tokens=", LDminTokens)
} else {}
} else {},
if(LDsampleSize != 42 && kRp.ld.status.opt.HDD){
echo(",\n\trand.sample=", LDsampleSize)
} else {},
if(LDwindow != 100 && kRp.ld.status.opt.MATTR){
echo(",\n\twindow=", LDwindow)
} else {}
),
tf(LDcaseSens, opt="case.sens", level=2),
tf(LDlemmatize, opt="lemmatize", level=2),
js(
if(LDdetails && kRp.ld.status.opt.MTLD){
echo(",\n\tdetailed=TRUE")
} else {},
echo(kRp.ld.measure.array),
if(kRp.ld.char.array != ""){
echo(kRp.ld.char.array)
if(stepSize != 5){
echo(",\n\tchar.steps=", stepSize)
} else {}
} else {
echo(",\n\tchar=NULL")
},
if(LDlog != 10){
echo(",\n\tlog.base=", LDlog)
} else {}
),
tf(LDkeepTokens, opt="keep.tokens", level=2),
echo(",\n\tquiet=TRUE\n)\n\n")
)
kRp.ld.js.print <- rk.paste.JS(
rk.JS.vars(
LDsegment,
LDfactorSize,
LDminTokens,
LDsampleSize,
LDwindow,
LDcaseSens,
LDlemmatize,
LDdetails,
showTypes
),
rk.JS.header("Lexical diversity results",
add=c("MSTTR segment size", LDsegment),
add=c("MTLD factor size", LDfactorSize),
add=c("MTLD-MA min. tokens/factor", LDminTokens),
add=c("HD-D random sample size", LDsampleSize),
add=c("MATTR window size", LDwindow),
add=c("Case sensitive", LDcaseSens),
add=c("Lemmatize", LDlemmatize),
add=c("Keep MTLD/MTLD-MA details", LDdetails)
),
echo("rk.results(summary(lexical.diversity.obj))\n"),
js(
if(showTypes){
rk.JS.header("Identified types in text", level=3)
echo("rk.print(slot(lexical.diversity.obj, \"tt\")[[\"types\"]])\n")
} else {}
),
echo("\n")
)
# make a whole component of the lex.div stuff
kRp.ld.component <- rk.plugin.component("Lexical Diversity",
xml=list(
logic=kRp.logic.ld,
dialog=kRp.dialog.ld),
js=list(results.header=FALSE,
calculate=kRp.ld.js.calc,
printout=kRp.ld.js.print
),
hierarchy=list("analysis", "Text Analysis"),
create=c("xml", "js"),
gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)
## frequency analysis
freqSourceFrame <- rk.XML.frame(
corpusDB <- rk.XML.dropdown(
label="Use corpus database",
options=list(
"none"=c(val="none", chk=TRUE),
"Leipzig Corpora Collection (*-text.tar.gz) "=c(val="LCC"),
"Celex (*.CD)"=c(val="celex")
),
id.name="corpusDB"
),
corpDBdir <- rk.XML.browser(label="Database file", type="file", required=FALSE, id.name="corpDBdir"),
CelexRunWd <- rk.XML.spinbox(label="Number of running words", min=1, real=FALSE, id.name="CelexRunWd"),
rk.XML.stretch(),
saveCorpFrq <- rk.XML.saveobj(label="Keep corpus object", initial="corp.freq.obj", id.name="saveCorpFrq"),
label="Frequencies from language corpora",
id.name="freqSourceFrame"
)
kRp.frq.frm.showFrqWC <- rk.XML.frame(
tfidf <- rk.XML.cbox("Term frequency/inverse document frequency statistics (tf-idf)", chk=TRUE, id.name="tfidf"),
freqShowTypes <- rk.XML.cbox("Show frequencies of types & token (by word class)", value="freqWC", id.name="freqShowTypes"),
label="Descriptive statistics",
id.name="frameFrqWCShow"
)
saveFrq <- rk.XML.saveobj(label="Keep results", initial="freq.analysis.obj", chk=TRUE, id.name="saveFrq")
varkRpFreqObj <- rk.XML.varslot(
label="Analyse against frequency object (valid class: kRp.corp.freq)",
source=varsHyph,
classes=c("kRp.corp.freq"),
id.name="varkRpFreqObj"
)
kRp.tab.frq <- rk.XML.tabbook(label="Frequency Analysis",
tabs=list(
"Data"=rk.XML.row(
rk.XML.col(varsHyph),
rk.XML.col(
varHyphenTagged,
varkRpFreqObj,
rk.XML.stretch(),
kRp.frq.frm.showFrqWC,
saveFrq
)
),
"Corpora"=freqSourceFrame#,
# "Options"=kRp.frq.col.options
)
)
kRp.dialog.frq <- rk.XML.dialog(kRp.tab.frq, label="Frequency Analysis")
kRp.logic.frq <- rk.XML.logic(
kRp.lgc.frq.haveFreqObject <- rk.XML.convert(sources=list(available=varkRpFreqObj), mode=c(and="")),
rk.XML.connect(governor=kRp.lgc.frq.haveFreqObject, client=freqSourceFrame, not=TRUE),
kRp.lgc.frq.needCorpDir <- rk.XML.convert(sources=list(string=corpusDB), mode=c(notequals="none"), id.name="convDrpDir"),
rk.XML.connect(governor=kRp.lgc.frq.needCorpDir, client=corpDBdir, set="enabled"),
rk.XML.connect(governor=kRp.lgc.frq.needCorpDir, client=corpDBdir, set="required"),
rk.XML.connect(governor=kRp.lgc.frq.needCorpDir, client=saveCorpFrq, set="enabled"),
kRp.lgc.frq.noFreqObject <- rk.XML.convert(sources=list(available=varkRpFreqObj), mode=c(equals=""), id.name="convnoFrqObj"),
kRp.lgc.frq.saveCorp <- rk.XML.convert(sources=list(kRp.lgc.frq.noFreqObject, kRp.lgc.frq.needCorpDir), mode=c(and=""), id.name="convSvCrp"),
rk.XML.connect(governor=kRp.lgc.frq.saveCorp, client=saveCorpFrq, set="active"),
kRp.lgc.frq.Celex <- rk.XML.convert(sources=list(string=corpusDB), mode=c(equals="celex"), id.name="convDrpCelex"),
rk.XML.connect(governor=kRp.lgc.frq.Celex, client=CelexRunWd, set="enabled")
)
kRp.frq.js.calc <- rk.paste.JS(
js(
if(!varkRpFreqObj){
if(corpusDB == "LCC" && corpDBdir){
echo("corp.freq.obj <- read.corp.LCC(\n\t\"", corpDBdir, "\"\n)\n\n")
} else if(corpusDB == "celex" && corpDBdir){
echo("corp.freq.obj <- read.corp.celex(\n\t\"", corpDBdir, "\",\n\trunning.words=", CelexRunWd, "\n)\n\n")
} else {}
} else {},
echo("freq.analysis.obj <- freq.analysis(\n\t",
varHyphenTagged
),
if(!varkRpFreqObj){
if(corpusDB != "none"){
echo(",\n\tcorp.freq=corp.freq.obj")
} else {}
} else {
echo(",\n\tcorp.freq=", varkRpFreqObj)
},
if(!tfidf){
echo(",\n\ttfidf=FALSE")
} else {}
),
echo("\n)\n\n")
)
kRp.frq.js.print <- rk.paste.JS(
rk.JS.vars(freqShowTypes),
rk.JS.header("Frequency analysis results"),
echo("rk.print(summary(freq.analysis.obj))\n\n"),
js(
if(freqShowTypes){
rk.JS.header("Frequencies of types & token (by word class)", level=3)
echo("freqTypeToken <- data.frame(\n\t",
"types=slot(freq.analysis.obj, \"desc\")$freq.types,\n\t",
"token=slot(freq.analysis.obj, \"desc\")$freq.token\n)\n",
"rk.print(freqTypeToken)\n\n")
} else {}
)
)
kRp.frq.component <- rk.plugin.component("Frequency Analysis",
xml=list(
logic=kRp.logic.frq,
dialog=kRp.dialog.frq),
js=list(results.header=FALSE,
calculate=kRp.frq.js.calc,
printout=kRp.frq.js.print
),
hierarchy=menu.hierarchy,
create=c("xml", "js"),
gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R"
)
#############
## if you run the following function call, files will be written to output.dir!
#############
# this is where it get's serious, that is, here all of the above is put together into one plugin
rk.kRp.dir <<- rk.plugin.skeleton(
about.info,
path=output.dir,
guess.getter=guess.getter,
xml=list(
logic=kRp.POS.lgc.sect,
dialog=kRp.dialog.POS),
js=list(
results.header=FALSE,
require="koRpus",
preprocess=kRp.POS.js.prep,
calculate=kRp.POS.js.calc,
printout=kRp.POS.js.print#,
# load.silencer=var.chk.suppress
),
pluginmap=list(name="Tokenizing & POS tagging", hierarchy=menu.hierarchy),
components=list(
kRp.hyph.component,
kRp.rdb.component,
kRp.ld.component,
kRp.frq.component),
dependencies=dependencies.info,
create=c("pmap", "xml", "js"),
overwrite=overwrite,
tests=FALSE,
# edit=TRUE,
load=TRUE,
hints=FALSE,
gen.info="$SRC/inst/rkward/rkwarddev_koRpus_plugin_script.R")#,
# show=TRUE)
if(isTRUE(update.translations)){
rk.updatePluginMessages(file.path(output.dir,"koRpus","inst","rkward","koRpus.pluginmap"))
} else {}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.