Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
fig.width = 7,
fig.height = 4,
fig.align = "center",
comment = "#>"
)
## ----setup, include = FALSE---------------------------------------------------
library(fplyr)
## ----store_path---------------------------------------------------------------
f <- system.file("extdata", "dt_iris.csv", package = "fplyr")
# Let's have a look at the first four lines of the file
fread(f, nrows = 4)
## ----flply_summary------------------------------------------------------------
species_summ <- flply(input = f, FUN = summary)
# Now `species_summ` is a list of three elements; let's show the 'versicolor' element
species_summ$versicolor
## ----flply_hclust-------------------------------------------------------------
clusters <- flply(f, FUN = function(d) {
dm <- dist(d[, -1]) # Compute the distance matrix, excluding the first field
hclust(dm) # Perform the clustering and return the object
})
# The `cluster` variable contains one "hclust" object for each species.
# Let's plot the 'setosa' dendrogram
plot(clusters$setosa)
## ----flply_kmeans-------------------------------------------------------------
kmeans_FUN <- function(d, my_centers) {
kmeans(d[, -1], centers = my_centers)
}
my_centers <- 2
# We pass `my_centers` to flply(), and flply() passes it to kmeans_FUN
clusters <- flply(f, FUN = kmeans_FUN, my_centers)
# Let's display the centers of the 'setosa' clusters
clusters$setosa$centers
# Now let's do the same thing, but with three centers for each species
my_centers <- 3
clusters <- flply(f, FUN = kmeans_FUN, my_centers)
clusters$setosa$centers
## ----flply_select-------------------------------------------------------------
sepal_length <- flply(f, `[[`, 2)
# Now `sepal_length` contains all the sepal lengths, divided by species
sepal_length
## ----ftply_by-----------------------------------------------------------------
selected_flowers <- ftply(f, function(d, by) {
if (by == "setosa")
return(NULL)
else
return(d)
})
# Let's have a look at the first few lines of the output; note that it start directly with 'versicolor', because all the 'setosa' flowers have been omitted
head(selected_flowers, 4)
## ----ftply_firstfield---------------------------------------------------------
count_cols <- function(d, by) {
ncol(d)
}
ftply(f, count_cols)
## ----ftply_head---------------------------------------------------------------
flowers_head <- ftply(f, nblocks = 1)
# Now `flowers_head` has 50 observations, while the original data set had 150. Let's have a look at the first ones.
head(flowers_head, 4)
## ----ftply_parallel-----------------------------------------------------------
result <- ftply(f, parallel = 3, FUN = function(d, by) {
d[sample(1:nrow(d), 10), ]
})
# Let's check that the output has 30 rows (10 for each species)
nrow(result)
## ----ffply--------------------------------------------------------------------
out <- tempfile() # Create temporary output file
ffply(f, out, function(d, by) {
# Here, `d` does not contain the subject IDs; they will be automatically added back later
x <- prcomp(d)$x
as.data.table(x)
})
# Let's check the result. Note in particular that the subject IDs are present
fread(out, nrows = 4)
## ----fmply_paths--------------------------------------------------------------
out <- c(pca = tempfile(), transf = tempfile())
# Note that the vector needs not be named, we use these names just for convenience
analyze_block <- function(d) {
# Here, `d` does contain the subject IDs, so we have to remove them...
x <- prcomp(d[, -1])$x
# ...and add them back manually
x <- cbind(d[, 1], x)
# Transform each number 'z' into e^(-z)
y <- cbind(d[, 1], exp(-d[, -1]))
# Return a list of two "data.table"s
list(x, y)
}
fmply(f, out, analyze_block)
## ----fmply_list---------------------------------------------------------------
analyze_block2 <- function(d) {
pca <- prcomp(d[, -1])
x <- cbind(d[, 1], pca$x)
y <- cbind(d[, 1], exp(-d[, -1]))
# 'x' and 'y' are the same as before, but now we add the 'pca' object
list(x, y, pca)
}
iris_pca <- fmply(f, out, analyze_block2)
# Let's have a look at the screeplot of the 'versicolor' PCA
screeplot(iris_pca$versicolor)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.