Nothing
### - - - - - - - - - - - - - - - - - -
### weightings.r
### - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - -
# local weightings
# what the hell ;)
lw_tf <- function(m) {
return(m)
}
# log'ed termfrequency
lw_logtf <- function(m) {
return( log(m+1) )
}
# binary termfrequency
lw_bintf <- function(m) {
return( (m>0)*1 )
}
# - - - - - - - - - - - -
# global weightings:
# Dumais (1992), same in Nakov (2001)
# normalisation
gw_normalisation <- function(m) {
return ( 1 / sqrt( rowSums((m*m), na.rm = TRUE) ) )
}
# inverse document frequency
# from Dumais (1992), Nakov (2001) uses log not log2
gw_idf <- function(m) {
df = rowSums(lw_bintf(m), na.rm=TRUE)
return ( ( log2(ncol(m)/df) + 1 ) )
}
# global frequency * inverse document frequency
# from Nakov (2001)
gw_gfidf <- function(m) {
gf = rowSums(m, na.rm = TRUE)
df = rowSums(lw_bintf(m), na.rm=TRUE)
return ( gf/df )
}
# real entropy from Shannon (1948)
entropy <- function (m) {
gf = rowSums(m, na.rm = TRUE)
p = m / gf
ndocs = ncol(m)
# shannon resp. turing (there: "weight of evidence")
# exception:
# iff p=0: 0*log(0) = 0
# this is solved by rowSums(..., na.rm=TRUE)
entropy = - rowSums( (p*log(p)) / log(ndocs), na.rm = TRUE )
return ( entropy )
}
# entropy as in Dumais(1992), Nakov(2001):
# global weighting = 1 + entropy
gw_entropy <- function(m) {
return ( (1 + entropy(m)) )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.