Nothing
## Create the swissbanknotes dataset
library(alr3)
data(banknote)
swissbanknotes <- banknote
colnames(swissbanknotes) <- tolower(colnames(swissbanknotes))
colnames(swissbanknotes)[ncol(swissbanknotes)] <- "class"
head(swissbanknotes)
swissbanknotes$class <- swissbanknotes$class+1
head(swissbanknotes)
library(rrcov)
plot(CovClassic(swissbanknotes[,1:6]), which="pairs", col=swissbanknotes$class)
##prompt(swisbanknotes)
save(swissbanknotes, file="swissbanknotes.rda", version=2)
## Example
data(swissbanknotes)
head(swissbanknotes)
plot(CovClassic(swissbanknotes[, 1:6]), which="pairs", col=swissbanknotes$class)
##-------------------------------------------------------------------------
df <- read.table("diabetes.txt")
head(df)
library(mclust)
data(diabetes)
rownames(diabetes) <- NULL
rownames(df) <- NULL
## sort by class
diabetes <- rbind(diabetes[which(diabetes[,1]=="Normal"),], diabetes[which(diabetes[,1]=="Chemical"),], diabetes[which(diabetes[,1]=="Overt"),])
## only one difference in insulin, observation 104
all.equal(diabetes[, 2:4], df, check.names=FALSE)
diabetes[,2:4]-df
colnames(diabetes)
diabetes <- diabetes[,c("glucose", "insulin", "sspg", "class")]
save(diabetes, file="diabetes.rda", version=2)
## The diabetes dataset, introduced by Reaven and Miller (1979),
## consists of 145 observations (patients). For each patient three
## measurements are reported: plasma glucose response to oral glucose,
## plasma insulin response to oral glucose, degree of insulin resistance.
##---------------------------------------------------------------
##
## Swiss Heads data
## The Swiss Heads data set was introduced by B. Flury and H. Riedwyl (1988).
## It contains information on six variables describing the dimensions of the
## heads of 200 twenty year old Swiss soldiers.
swissheads <- read.table("head.txt")
colnames(swissheads) <- c('minimal_frontal_breadth', 'breadth_angulus_mandibulae', 'true_facial_height', 'length_glabella_nasi', 'length_tragion_nasion', 'length_tragion_gnathion')
head(swissheads)
save(swissheads, file="swissheads.rda", version=2)
##---------------------------------------------------------------
##
## Emilia Romana
##
cols <- c('less10', 'more75', 'single', 'divorced', 'widows', 'graduates', 'no_education',
'employed', 'unemplyed', 'increase_popul', 'migration', 'birth_92_94', 'fecundity',
'houses', 'houses_2WCs', 'houses_heating', 'TV', 'cars', 'luxury_cars', 'hotels',
'banking', 'income', 'income_tax_returns', 'factories', 'factories_more10', 'factories_more50',
'artisanal', 'entrepreneurs')
## The data set of the municipalities in Emilia Romagna, introduced by
## Atkinson et al. (2004), contains 341 records for 341 municipalities
## of Emilia Romagna (an Italian region) for 28 demographic variables.
emilia2001 <- read.table("emilia2001.txt")
head(emilia2001)
dim(emilia2001)
colnames(emilia2001) <- cols
head(emilia2001)
save(emilia2001, file="emilia2001.rda", version=2)
mcd <- CovMcd(emilia2001)
plot(mcd)
##---------------------------------------------------------------
##
## Geyser data
##
## geyser2 <- read.table("geyser2.txt")
library(tclust)
data(geyser2)
head(geyser2)
dim(geyser2)
save(geyser2, file="geyser2.rda", version=2)
##---------------------------------------------------------------
##
## M5data data
##
## M5data <- read.table("M5data.txt")
## colnames(M5data) <- c("X", "Y", "cluster")
library(tclust)
data(M5data)
head(M5data)
dim(M5data)
save(M5data, file="M5data.rda", version=2)
##---------------------------------------------------------------
##
## Hawkins data
##
library(R.matlab)
hh <- readMat("hawkins.mat")
hawkins <- as.data.frame(hh$hawkins[[1]])
note <- hh$hawkins[[4]]
rownames(hawkins) <- 1:nrow(hawkins)
colnames(hawkins) <- c(paste0("X", 1:(ncol(hawkins)-1)), "y")
save(hawkins, file="hawkins.rda", version=2)
##---------------------------------------------------------------
##
## Flea data
##
df <- read.table("flea.txt")
head(df)
dim(df)
colnames(df) <- c("tars1", "tars2", "head", "aede1", "aede2", "aede3", "species")
labels <- c("Concinna", "Heptapotamica", "Heikertingeri")
df$species <- factor(df$species, levels=c(1,2,3), labels=labels)
## convert the numerical variables to double - otherwise they will be integer and MATLAB will spit
df[, 1:6] <- matrix(as.double(as.matrix(df[,1:6])), ncol=6)
flea <- df
save(flea, file="flea.rda", version=2)
## REFERENCE
## A. A. Lubischew (1962), "On the Use of Discriminant Functions in Taxonomy", Biometrics, Dec 1962, pp.455--477.
##
## JSTOR: https://www.jstor.org/stable/2527894
## Flea-beetle measurements
## tars1, width of the first joint of the first tarsus in microns (the sum of measurements for both tarsi)
## tars2, the same for the second joint
## head, the maximal width of the head between the external edges of the eyes in 0.01 mm
## ade1, the maximal width of the aedeagus in the fore-part in microns
## ade2, the front angle of the aedeagus ( 1 unit = 7.5 degrees)
## ade3, the aedeagus width from the side in microns
## species, which species is being examined - Concinna, Heptapotamica, Heikertingeri
##---------------------------------------------------------------
##
## Income1 data
##
df <- read.table("Income1.txt")
head(df)
dim(df)
colnames(df) <- c("H_NUMPER", "HOTHVAL", "HSSVAL", "HTOTVAL")
## convert the numerical variables to double - otherwise they will be integer and MATLAB will spit
df[, 1:4] <- matrix(as.double(as.matrix(df[,1:4])), ncol=4)
Income1 <- df
save(Income1, file="Income1.rda", version=2)
## Source: United States Census Bureau, Annual Social and Economic Supplements (2021)
## https://www.census.gov/data/datasets/time-series/demo/cps/cps-asec.html
## REFERENCE
## Atkinson et al. (2023)
## Income data taken from the United States Census Bureau. The data are
## a random sample of 200 observations referred to the following variables.
## The goal is to predict HTOTVAL.
## H_NUMPER, Number of persons in household
## HOTHVAL, All other types of income except HEARNVAL Recode - Total other household income
## HSSVAL, household income - social security
## HTOTVAL, total household income (dollar amount).
##---------------------------------------------------------------
##
## Income2 data
##
df <- read.table("Income2.txt")
head(df)
dim(df)
colnames(df) <- c("Age", "Education", "Gender", "ExtraGain", "Hours", "Income")
## convert the numerical variables to double - otherwise they will be integer and MATLAB will spit
df[, 1:6] <- matrix(as.double(as.matrix(df[,1:6])), ncol=6)
df$Gender <- factor(df$Gender, labels=c("Female", "Male"))
Income2 <- df
save(Income2, file="Income2.rda", version=2)
## REFERENCE
## Atkinson et al. (2023)
## A sample of 200 observations of full time employees from a municipality
## in Northern Italy who have declared extra income from investment
## sources. The variables are as follows.
## The goal is the possibility in predicting income level based on the individuals personal information
## Age, Age of the person (the minimum is 19 and the maximum is 73).
## Education, Number of years of education (the minimum value of 5 is primary school, and the maximum value is 16 bachelor degree)
## Gender, 1 is Male and 0 is Female
## ExtraGain, Income from investment sources (profit-losses) apart from wages/salary
## Hours, total number of declared hours worked during the week. The minimum value is 35 and the maximum is 99.
## Income, total yearly income (Euro amount)
##================================================================
##
## Bank data
##
df <- read.csv("bank_data.csv")
head(df)
dim(df)
## convert the numerical variables to double - otherwise they will be integer and MATLAB will spit
df <- matrix(as.double(as.matrix(df)), ncol=ncol(df))
bank_data<- df
save(bank_data, file="bank_data.rda", version=2)
## Description
#' There are 1949 univariate observations on the amount of money made from individual
#' personal banking customers over a year for an Italian bank. Because of the linking
#' of products, it is not straightforward for the bank to attribute the profit to
#' individual sources. The bank made a preliminary classification of its 700 products
#' into 48 macrocategories (macroservices). Among these 48 macrocategories, the 13
#' most important ones according to the bank are listed below and form our set of
#' explanatory variables. All explanatory variables are discrete, taking
#' values 0, 1, 2, . . . , the number of services (inside each macroservice) that
#' each customer has signed up for number of credit cards, number of domestic
#' direct debits, number of current accounts and so forth. T
The variables are:
- X1 = Personal loans,
- X2 = Financing and hire-purchase,
- X3 = Mortgages,
- X4 = Life insurance
- X5 = Share account
- X6 = Bond account
- X7 = Current account
- X8 = Salary deposits
- X9 = Debit cards
- X10 = Credit cards
- X11 =Telephone banking
- X12 = Domestic direct debits
- X13 = Money transfers.
- y = Profit/loss,
## Source
Riani, M., Cerioli, A., Atkinson, A. C., and Perrotta, D. (2014d). Supplement
to Monitoring robust regression. doi:10.1214/14-EJS897SUPP.
## References
Riani, M., Cerioli, A., Atkinson, A. C., and Perrotta, D. (2014a). Monitoring robust
regression. Electronic Journal of Statistics, 8, 642673.
##================================================================
##
## Hospital data
##
df <- read.csv("hospital.csv")
head(df)
dim(df)
## convert the numerical variables to double - otherwise they will be integer and MATLAB will spit
df <- matrix(as.double(as.matrix(df)), ncol=ncol(df))
hospital <- df
save(hospital, file="hospital.rda", version=2)
## Description
#' Hospital data (Neter et al., 1996).
#'
#' Data on the logged survival time of 108 patients undergoing liver surgery,
#' together with four potential explanatory variables. Data are composed of
#' 54 observations plus other 54 observations, introduced to check the model
#' fitted to the first 54. Their comparison suggests there is no systematic
#' difference between the two sets. However by looking at some FS plots
#' (Riani and Atkinson, 2007), we conclude that these two groups are significantly
#' different
The variables are:
- X1
- X2
- X3
- X4
- y
## Source
J. NETER, M. H. KUTNER, C. J. NACHTSHEIM, W.WASSERMAN, Applied Linear Statistical Models (4th edition). McGraw-Hill, New York, 1996.
## References
A. C. ATKINSON, M. RIANI, Robust Diagnostic Regression Analysis. Springer-Verlag, New York, 2000.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.