#' Simple eigenSentence estimation function.
#'
#' Applies a function to a matrix representation of a sentence to get an
#' eigensentence map.
#'
#'
#' @param wordembed the words and their vector embedding - a data frame with n
#' basis length columns
#' @param functiontoapply e.g. mean, max, min, a custom function --- needs to
#' work with apply
#' @param normalize normalize the magnitude of the eigsentences to be 1
#' @param eigsentbasislength less than or equal to the full basis length (100)
#' @return eigensentence matrix is output
#' @author Avants BB
#' @examples
#'
#' data(reuters_words,package="RKRNS")
#' esent<-eigenSentences( reuters_words )
#'
eigenSentences <- function( wordembed, functiontoapply = sum , normalize=F, eigsentbasislength = NA, sentencesIn=NA, eventdata=NA ) {
######## build the average eigenword for each sentence #########
if ( is.na(sentencesIn) )
{
data(sentences,package="RKRNS")
sentencesIn<-sentences
}
words<-wordembed$WhichWord
nwords<-length(words)
nsentencesIn<-length(sentencesIn$Sentence)
if ( is.na(eigsentbasislength) ) eigsentbasislength<-ncol( wordembed ) - 1
if ( eigsentbasislength > (ncol( wordembed ) - 1) ) eigsentbasislength<-ncol( wordembed ) - 1
sclfactor<-1
if ( typeof(functiontoapply) == "character" ) if ( functiontoapply == "svd" ) sclfactor<-2
if ( typeof(functiontoapply) == "character" ) if ( functiontoapply == "nvn" ) sclfactor<-3
data(eigen_sentences, package = "RKRNS")
eigsent<-matrix( rep( 0, (nsentencesIn) * eigsentbasislength * sclfactor ) , nrow=nsentencesIn )
rownames( eigsent )<-sentencesIn$Sentence
sentct<-0 # should = nsentencesIn
for ( i in 1:nsentencesIn )
{
locwords<-unlist(strsplit(as.character(sentencesIn$Sentence[i]),"\\."))
locwords<-words[ words %in% locwords ]
if ( length(locwords) > 0 )
{
sentct<-sentct+1
sentmat<-matrix( rep( 0, length(locwords) * eigsentbasislength ) , nrow=length(locwords) )
for ( j in 1:length(locwords) )
{
whichword2<-wordembed$WhichWord == locwords[j]
wordvec <- wordembed[whichword2,2:(2+eigsentbasislength-1)]
wvec<-as.numeric( wordvec )
sentmat[j,]<-wvec
}
sentmat<-antsrimpute( sentmat )
if ( typeof(functiontoapply) == "character" )
{
if ( functiontoapply == "svd" )
{
pj<-svd(sentmat)$u %*% sentmat
if ( dim(pj)[1] == 1 ) pj<-rbind(pj,pj)
eigsent[sentct,]<-as.numeric(pj[1:2,])
}
if ( functiontoapply == "joao" )
{
whichesent<-which( eigen_sentences$Sentence == sentencesIn$Sentence[i])
esent<-eigen_sentences[ whichesent, 4:(4+eigsentbasislength-1) ]
eigsent[sentct,]<-as.numeric(esent)
}
if ( functiontoapply == "nvn" )
{
ww<-which( eventdata$sentences == sentencesIn$Sentence[i] )[1]
whichword2<-as.character(wordembed$WhichWord)== as.character(eventdata$enouns1[ww])
if ( !is.na(whichword2) ) wordvec1 <- wordembed[whichword2,2:(2+eigsentbasislength-1)] else wordvec1<-rep(NA,eigsentbasislength)
whichword2<-as.character(wordembed$WhichWord)== as.character(eventdata$everbs1[ww])
if ( (sum(whichword2) > 0 ) & !is.na(whichword2) )
wordvec2 <- wordembed[whichword2,2:(2+eigsentbasislength-1)] else wordvec2<-rep(NA,eigsentbasislength)
whichword2<-as.character(wordembed$WhichWord)== as.character(eventdata$enouns2[ww])
if ( !is.na(whichword2) ) wordvec3 <- wordembed[whichword2,2:(2+eigsentbasislength-1)] else wordvec3<-rep(NA,eigsentbasislength)
eigsent[sentct,]<-c(as.numeric(wordvec1),as.numeric(wordvec2),as.numeric(wordvec3))
}
}
else
{
eigsent[sentct,]<-apply(sentmat,FUN=functiontoapply,MARGIN=2)
}
}
}
eigsent<-antsrimpute(eigsent)
if ( normalize )
{
eigsentmag<-sqrt( rowSums(eigsent * eigsent) )
eigsent<-eigsent/eigsentmag
}
return( eigsent )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.