Introduction_to_DTwrappers_Updated.R
In DTwrappers: Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  tidy = TRUE
)

## ----setup--------------------------------------------------------------------
library(DTwrappers)
data(iris)
n <- nrow(iris)
dt.name <- "dat"

RNGversion(vstr = 3.6)
set.seed(seed = 921)

# Randomizes the order of the rows.
dat <- data.table::as.data.table(x = iris[sample(x = 1:n, size = n, replace = F),])
head(dat)

## ----display first 3 rows of iris dataset-------------------------------------
dt.choose.rows(dt.name = "dat", the.filter = 1:3)

## ----display first 3 rows of iris dataset with filter-------------------------
dt.choose.rows(dt.name = "dat", the.filter = "1:3")

## ----display first 3 rows of iris dataset with code---------------------------
dt.choose.rows(dt.name = "dat", the.filter = "1:3", return.as = "code")

## ----display first 3 rows of iris dataset with code and filter----------------
dt.choose.rows(dt.name = "dat", the.filter = "1:3", return.as = "all")

## ----chose rows when the sepal length is less than 4.4------------------------
dt.choose.rows(dt.name = "dat", the.filter = "Sepal.Length < 4.4", return.as = "all")

## ----choose rows with filter as expression------------------------------------
dt.choose.rows(dt.name = "dat", the.filter = expression(Sepal.Length < 4.4), return.as = "all")

## ----choose rows more complex filtering---------------------------------------
dt.choose.rows(dt.name = "dat", the.filter = "Sepal.Width >= 3 & Sepal.Length < 4.8 & Species == 'setosa'", return.as = "all")

## ----chose all columns for iris with first 5 entries--------------------------
dt.choose.cols(dt.name = "dat", the.variables = ".", the.filter = "1:5", return.as = "all")

## ----chose species and sepal length for first 3 entries-----------------------
dt.choose.cols(dt.name = "dat", the.variables = c("Species", "Sepal.Length"), the.filter = "1:3", return.as = "all")

## ----chose first 2 rows of data for each species and print sepal length and sepal width for the flowers----
dt.choose.cols(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", first.k = 2, return.as = "all")

## ----first2_per_group---------------------------------------------------------
dt.first.k.rows(dt.name = "dat", k = 2, the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", return.as = "all")

## ----chose last 2 rows of data for each species and print sepal length and sepal width for the flowers----
dt.choose.cols(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", last.k = 2, return.as = "all")

## ----dt.last.k.rows-----------------------------------------------------------
dt.last.k.rows(dt.name = "dat", k = 2, the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", return.as = "all")

## ----row indices--------------------------------------------------------------
dt.choose.cols(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), grouping.variables = "Species", row.indices = c(3,5,7), return.as = "all")

## ----Count the number of rows in the dataset iris-----------------------------
dt.count.rows(dt.name = "dat", return.as = "all")

## ----Count number of rows where species is "Setosa"---------------------------
dt.count.rows(dt.name = "dat", the.filter = "Species == 'setosa'", return.as = "all")

## ----Count in subgroups-------------------------------------------------------
dt.count.rows(dt.name = "dat", the.filter = "Petal.Length > 1.7", grouping.variables = "Species", return.as = "all")

## ----Count in subgroups with new name-----------------------------------------
dt.count.rows(dt.name = "dat", the.filter = "Petal.Length > 1.7", grouping.variables = "Species", count.name = "Total Qualifying Rows", return.as = "all")

## ----define category as a new column with iris as its value-------------------
dt.define.variable(dt.name = "dat", variable.name = "Category", the.values = "Flower: Iris", return.as = "all", specification = "by.value")

## ----define max sepal length species variable---------------------------------
dt.define.variable(dt.name = "dat", variable.name = "Max_Sepal_Length_Species", the.values = "max(Sepal.Length)", specification = "by.expression", grouping.variables = "Species", sortby.group = TRUE, return.as = "all")

## ----define.variable.in.subset------------------------------------------------
dt.define.variable(dt.name = "dat", variable.name = "setosa_sl_below_5", the.values = "Sepal.Length < 5", specification = "by.expression", the.filter = "Species == 'setosa'", return.as = "all")

## ----removing category as a variable------------------------------------------
dt.remove.variables(dt.name = "dat", the.variables = c("Category", "setosa_sl_below_5"), return.as = "all")

## ----sort species and sepal length in increasing order------------------------
dt.sort(dt.name = "dat", sorting.variables = c("Species", "Sepal.Length"), sort.increasing = TRUE, return.as = "all")

## ----sort species and sepal length in decreasing order------------------------
dt.sort(dt.name = "dat", sorting.variables = c("Species", "Sepal.Length"), sort.increasing = FALSE, return.as = "all")

## ----species in increasing order and sepal length in decreasing order---------
dt.sort(dt.name = "dat", sorting.variables = c("Species", "Sepal.Length"), sort.increasing = c(T, F), return.as = "all")

## ----calculate_one_fn_one_variable--------------------------------------------
dt.calculate(dt.name = "dat", the.functions = "mean", the.variables = "Sepal.Length", return.as = "all")

## ----calculate_multiple_fn----------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = "Sepal.Length", return.as = "all")

## ----dt.calculate.parameters--------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = "Sepal.Length", other.params = "na.rm = T", return.as = "all")

## ----dt.calculate.filter.group------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = "Sepal.Length", the.filter = "Sepal.Length > 3.5", grouping.variables = "Species", return.as = "all")

## ----dt.calculate.filter.group.mult.variables---------------------------------
dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), the.filter = "Sepal.Length > 3.5", grouping.variables = "Species", return.as = "all")

## ----dt.calculate.filter.group.mult.variables.wide----------------------------
#dt.calculate(dt.name = "dat", the.functions = c("mean", "median", "sd"), the.variables = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), the.filter = "Sepal.Length > 3.5", grouping.variables = "Species", table.format = "wide", return.as = "all")

## ----adding triple mean as a function in dt.calculate-------------------------
`triple mean` <- function(x, na.rm = T){
  return(3 * mean(x = x, na.rm = na.rm))
}

dt.calculate(dt.name = "dat", the.variables = c("Sepal.Length", "Sepal.Width"), the.functions = c("mean", "sd", "triple mean"), grouping.variables = "Species", table.format = "long", return.as = "all")

## ----rowSums------------------------------------------------------------------
dt.calculate(dt.name = "dat", the.functions = "rowSums", the.variables = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"), the.filter = 1:5, individual.variables = FALSE, return.as = "all")

## ----get.lm.coefs-------------------------------------------------------------
get.lm.coefs <- function(data, formula){
  require(data.table)
  mod <- lm(formula = formula, data = data)
  the.coefs <- as.data.table(x = summary(mod)$coefficients, keep.rownames = TRUE)
  setnames(x = the.coefs, old = "rn", new = "Variable")
  return(the.coefs)
}

## ----linear regression using dt.calculate-------------------------------------
## linear regression 
dt.calculate(dt.name = "dat", the.functions = "get.lm.coefs", grouping.variables = "Species", other.params = "formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width", return.as = "all", individual.variables = F, add.function.name = F)

Any scripts or data that you put into this service are public.

DTwrappers documentation built on June 21, 2021, 9:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DTwrappers
Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package

inst/doc/Introduction_to_DTwrappers_Updated.R
In DTwrappers: Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package

Try the DTwrappers package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DTwrappers Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package

inst/doc/Introduction_to_DTwrappers_Updated.R In DTwrappers: Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package

Try the DTwrappers package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DTwrappers
Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package

inst/doc/Introduction_to_DTwrappers_Updated.R
In DTwrappers: Simplified Data Analysis with Wrapper Functions for the 'Data.Table' Package