inst/doc/rscc.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, include=FALSE-----------------------------------------------------
library("rscc")

## ---- eval=FALSE--------------------------------------------------------------
#  files <- ... # get file names from somehere, e.g. list.files()
#  prgs  <- sourcecode(files, title=basename(files))
#  docs  <- documents(prgs, type="names")
#  sim   <- similarities(docs)  # you may use alternatively tfidf()
#  dfsim <- matrix2dataframe(sim)
#  head(dfsim, n=25)
#  browse(prgs, dfsim, n=6)     # creates and opens a HTML file

## ---- eval=FALSE--------------------------------------------------------------
#  files <- ... # get file names from somehere, e.g. list.files()
#  # load all expressions with at least `minlines` lines
#  prgs  <- sourcecode(files, title=basename(files), minlines=0)
#  docs  <- documents(prgs, type="names")
#  sim   <- similarities(docs)  # you may use alternatively tfidf()
#  sim   <- same_file(sim)      # do not compare expressions within one file
#  dfsim <- matrix2dataframe(sim)
#  head(dfsim, n=25)
#  browse(prgs, dfsim, n=6)     # creates and opens a HTML file

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files))
names(prgs)

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), minlines=3, silent=TRUE)
names(prgs)

## -----------------------------------------------------------------------------
docs <- documents(prgs)
# create term document frequency table
freq_table(docs)[1:8,1:8]

## -----------------------------------------------------------------------------
cat(as.character(prgs[[1]]))                       # source code
all.vars(prgs[[1]])                                # type="v", default
all.names(prgs[[1]])                               # type="n"
setdiff(all.names(prgs[[1]]), all.vars(prgs[[1]])) # type="f"

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), silent=TRUE)
docs  <- documents(prgs)
similarities(docs)[1:8,1:8]

## ---- eval=FALSE--------------------------------------------------------------
#  inset1 <- setfull %in% unique(set1)
#  inset2 <- setfull %in% unique(set2)
#  p      <- length(setfull)
#  n11    <- sum(inset1 & inset2)
#  n10    <- sum(inset1 & !inset2)
#  n01    <- sum(!inset1 & inset2)
#  n00    <- sum(!inset1 & !inset2)

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), silent=TRUE)
docs  <- documents(prgs)
similarities(docs, coeff="m")[1:8,1:8]

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), silent=TRUE)
docs  <- documents(prgs)
tfidf(docs)[1:8,1:8]

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), silent=TRUE)
docs  <- documents(prgs)
simm  <- similarities(docs, coeff="m")
simdf <- matrix2dataframe(simm)
head(simdf, 10)

## ---- fig.height=3------------------------------------------------------------
stripchart(simdf[,3], "jitter", pch=19, xlab=names(simdf)[3])

## -----------------------------------------------------------------------------
library("igraph")
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), silent=TRUE)
docs  <- documents(prgs, type="n", minlen=3)
simm  <- similarities(docs)
graph <- as_igraph(simm, diag=FALSE)
# color all edges wit a large similarity coefficients in red
E(graph)$color <- ifelse(E(graph)$weight>0.4, "red", "grey")
plot(graph, edge.width=1+3*E(graph)$weight)
box()

## -----------------------------------------------------------------------------
files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
prgs  <- sourcecode(files, title=basename(files), silent=TRUE, minlines=1)
docs  <- documents(prgs)
simm  <- similarities(docs)
simm[1:3,1:3]
simm  <- same_file(simm)
simm[1:3,1:3]

## ---- eval=FALSE--------------------------------------------------------------
#  files <- list.files(system.file("examples", package="rscc"), "*.R$", full.names = TRUE)
#  prgs  <- sourcecode(files, title=basename(files), silent=TRUE)
#  docs  <- documents(prgs, type="n", minlen=3)
#  simdf <- matrix2dataframe(similarities(docs))
#  if (interactive()) browse(prgs, simdf, simdf[,3]>0.4)

Try the rscc package in your browser

Any scripts or data that you put into this service are public.

rscc documentation built on Jan. 20, 2022, 5:08 p.m.