inst/doc/Introduction_to_DTwrappers_Updated.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  tidy = TRUE
)

## ----setup--------------------------------------------------------------------
library(DTwrappers)
data(iris)
n <- nrow(iris)
dt.name <- "dat"

RNGversion(vstr = 3.6)
set.seed(seed = 921)

# Randomizes the order of the rows.
dat <- data.table::as.data.table(x = iris[sample(x = 1:n, size = n, replace = F),])
head(dat)

## ----display first 3 rows of iris dataset-------------------------------------
dt.choose.rows(dt.name = "dat", the.filter = 1:3)

## ----display first 3 rows of iris dataset with filter-------------------------
dt.choose.rows(dt.name = "dat", the.filter = "1:3")

## ----display first 3 rows of iris dataset with code---------------------------
dt.choose.rows(dt.name = "dat", the.filter = "1:3", return.as = "code")

## ----display first 3 rows of iris dataset with code and filter----------------
dt.choose.rows(dt.name = "dat", the.filter = "1:3", return.as = "all")

## ----chose rows when the sepal length is less than 4.4------------------------
dt.choose.rows(dt.name = "dat", the.filter = "Sepal.Length < 4.4", return.as = "all")

## ----choose rows with filter as expression------------------------------------
dt.choose.rows(dt.name = "dat", the.filter = expression(Sepal.Length < 4.4), return.as = "all")

## ----choose rows more complex filtering---------------------------------------
dt.choose.rows(dt.name = "dat", the.filter = "Sepal.Width >= 3 & Sepal.Length < 4.8 & Species == 'setosa'", return.as = "all")

## ----chose all columns for iris with first 5 entries--------------------------
dt.choose.cols(dt.name = "dat", the.variables = ".", the.filter = "1:5", return.as = "all")

## ----chose species and sepal length for first 3 entries-----------------------
dt.choose.cols(dt.name = "dat", the.variables = c("Species", "Sepal.Length"), the.filter = "1:3", return.as = "all")

## ----chose first 2 rows of data for each species and print sepal length and sepal width for the flowers----
dt.choose.cols(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", first.k = 2, return.as = "all")

## ----first2_per_group---------------------------------------------------------
dt.first.k.rows(dt.name = "dat", k = 2, the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", return.as = "all")

## ----chose last 2 rows of data for each species and print sepal length and sepal width for the flowers----
dt.choose.cols(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", last.k = 2, return.as = "all")

## ----dt.last.k.rows-----------------------------------------------------------
dt.last.k.rows(dt.name = "dat", k = 2, the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", return.as = "all")

## ----row indices--------------------------------------------------------------
dt.choose.cols(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", row.indices = c(3,5,7), return.as = "all")

## ----Count the number of rows in the dataset iris-----------------------------
dt.count.rows(dt.name = "dat", return.as = "all")

## ----Count number of rows where species is "Setosa"---------------------------
dt.count.rows(dt.name = "dat", the.filter = "Species == 'setosa'", return.as = "all")

## ----Count in subgroups-------------------------------------------------------
dt.count.rows(dt.name = "dat", the.filter = "Petal.Length > 1.7", grouping.variables = "Species", return.as = "all")

## ----Count in subgroups with new name-----------------------------------------
dt.count.rows(dt.name = "dat", the.filter = "Petal.Length > 1.7", grouping.variables = "Species", count.name = "Total Qualifying Rows", return.as = "all")

## ----define category as a new column with iris as its value-------------------
dt.define.variable(dt.name = "dat", variable.name = "Category", the.values = "Flower: Iris", return.as = "all", specification = "by.value")

## ----define max sepal length species variable---------------------------------
dt.define.variable(dt.name = "dat", variable.name = "Max_Sepal_Length_Species", the.values = "max(Sepal.Length)", specification = "by.expression", grouping.variables = "Species", sortby.group = TRUE, return.as = "all")

## ----define.variable.in.subset------------------------------------------------
dt.define.variable(dt.name = "dat", variable.name = "setosa_sl_below_5", the.values = "Sepal.Length < 5", specification = "by.expression", the.filter = "Species == 'setosa'", return.as = "all")

## ----removing category as a variable------------------------------------------
dt.remove.variables(dt.name = "dat", the.variables = c("Category", "setosa_sl_below_5"), return.as = "all")

## ----sort species and sepal length in increasing order------------------------
dt.sort(dt.name = "dat", sorting.variables = c("Species", "Sepal.Length"), sort.increasing = TRUE, return.as = "all")

## ----sort species and sepal length in decreasing order------------------------
dt.sort(dt.name = "dat", sorting.variables = c("Species", "Sepal.Length"), sort.increasing = FALSE, return.as = "all")

## ----species in increasing order and sepal length in decreasing order---------
dt.sort(dt.name = "dat", sorting.variables = c("Species", "Sepal.Length"), sort.increasing = c(T, F), return.as = "all")

## ----calculate_one_fn_one_variable--------------------------------------------
dt.calculate(dt.name = "dat", the.functions = "mean", the.variables = "Sepal.Length", return.as = "all")

## ----calculate_multiple_fn----------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = "Sepal.Length", return.as = "all")

## ----dt.calculate.parameters--------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = "Sepal.Length", other.params = "na.rm = T", return.as = "all")

## ----dt.calculate.filter.group------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = "Sepal.Length", the.filter = "Sepal.Length > 3.5", grouping.variables = "Species", return.as = "all")

## ----dt.calculate.filter.group.mult.variables---------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), the.filter = "Sepal.Length > 3.5", grouping.variables = "Species", return.as = "all")

## ----dt.calculate.filter.group.mult.variables.wide----------------------------
#dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), the.filter = "Sepal.Length > 3.5", grouping.variables = "Species", table.format = "wide", return.as = "all")

## ----adding triple mean as a function in dt.calculate-------------------------
`triple mean` <- function(x, na.rm = T){
  return(3 * mean(x = x, na.rm = na.rm))
}

dt.calculate(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), the.functions = c("mean", "sd", "triple mean"), grouping.variables = "Species", table.format = "long", return.as = "all")

## ----rowSums------------------------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = "rowSums", the.variables = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), the.filter = 1:5, individual.variables = FALSE, return.as = "all")

## ----get.lm.coefs-------------------------------------------------------------
get.lm.coefs <- function(data, formula){
  require(data.table)
  mod <- lm(formula = formula, data = data)
  the.coefs <- as.data.table(x = summary(mod)$coefficients, keep.rownames = TRUE)
  setnames(x = the.coefs, old = "rn", new = "Variable")
  return(the.coefs)
}

## ----linear regression using dt.calculate-------------------------------------
## linear regression 
dt.calculate(dt.name = "dat", the.functions = "get.lm.coefs", grouping.variables = "Species", other.params = "formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width", return.as = "all", individual.variables = F, add.function.name = F)

Try the DTwrappers package in your browser

Any scripts or data that you put into this service are public.

DTwrappers documentation built on June 21, 2021, 9:06 a.m.