R/exampleSearchSimple.R

################################################################################
# Author: Joshua Heipel <[email protected]>
################################################################################
 
# CONNECT TO TAMBORA PROJECT ###################################################

# needs devtools to install via github
# if (!require("devtools")) install.packages("devtools")
# library("devtools")
# devtools::install_github("tambora-org/tamboRapi")
# devtools::install_github("joshua-heipel/textmining")

# SET LOCAL DIRECTORY OF R-SOURCEFILES #########################################
# TODO: install github repository for generic approach to access the text mining
# functions

# Set local working directory
setwd("C:/Users/Josh/Dropbox/HiwiJob/Tambora/R/textmining")

# load R-Files for text mining
invisible(sapply(list.files(".", ".R"), source))

# READ DATA FROM TAMBORA #######################################################

# read data from project 1853
data <- tamboRapi::fromTambora("g[pid]=1853")

# set keywords to search for
querywords <- c("Frost", "minus", "Temperatur", "kalt", "eisig")

# MAIN PROGRAMM ################################################################

# split documents into sentences (by punctuation)
sentences <- SplitDocs(data$text)

# create vector space model from all sentences
vsm <- CreateVSM(unlist(sentences), language="de", stemming=F)

# create query vector
q <- CreateQuery(querywords, row.names(vsm), stemming=F)

# search for sentences with semantic similarities to the given set of keywords
ranks <- LatentSemanticSearch(vsm, q, spec="nnn", dims=10)

# View the 10 most similar sentences with the given keywords
results <- cbind(unlist(sentences), round(ranks,2))
View(results[order(ranks, decreasing=T),][1:10,])
joshua-heipel/textmining documentation built on Aug. 2, 2017, 12:03 a.m.