Smoking: Cigarette consumption and life expectancy by country

Description Usage Format Details Source Examples

Description

The variables for this data set were assembled from various web sources. They consist of life expectancy, cigarette consumption per capita and health expenditures per capita in 2004. The relationship between life expectancy and cigarette consumption is paradoxical leading to reflection on omitted variables and problems of causal inference with observational and aggregated data. The association between life expectancy and cigarette consumption could be termed an 'ecological correlation'.

Usage

1

Format

A data frame with 192 observations on the following 10 variables.

Country

a factor with 192 levels Afghanistan Albania Algeria Andorra ...

Continent

a factor with 6 levels Africa Asia Australia Europe North America South America

LE

Life expectancy (combining sexes – separate would be much more interesting), a numeric vector

CigCon

Annual cigarette consumption per capita, a numeric vector

LE.q

Life expectancy quartile, a factor with levels (28.6,40.2] (40.2,51.8] (51.8,63.4] (63.4,75]

Cont

short labels for Continent, a factor with levels Afrc Asia Astr Eurp NrtA SthA

Cont2

short labels for Continent, a factor with levels Africa Asia Australia Europe N.America S.America

HealthExpPC

Health expenditure per capita in US $, a numeric vector

Year

relevant year – all 2004, a numeric vector

HE

Health expenditure quartile, a factor with levels (152,476] (31.1,152] (476,6.1e+03] [0.3,31.1]

Details

Each row consists of the data for one country.

Source

http://www.nationmaster.com

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
data(Smoking)
str(Smoking)
fit1 <- lm( LE ~ CigCon, Smoking)
fit2 <- lm( LE ~ CigCon + HealthExpPC, Smoking)
summary(fit2) 
anova(fit1,fit2)

## Not run: 
    head(Smoking)
    rownames(Smoking) = Smoking$Country

    Init3d(family = 'serif', cex = 1.5)
    Plot3d( LE ~ CigCon + HealthExpPC | Continent, Smoking)
    Axes3d()
    Identify3d(pad=1)


    fit = lm( LE ~ CigCon + log(HealthExpPC) +I(CigCon^2) + I(log(HealthExpPC)^2) + I(CigCon*log(HealthExpPC)), Smoking)
    Fit3d( fit )
    fitl <- lm( LE ~ CigCon + HealthExpPC, Smoking)
    Fit3d( fitl, col = 'pink')
# HEpCap is highly 'skewed': dense on left, long tail on right
    require( lattice )
    densityplot( Smoking$HealthExpPC )

# Useful to use a transformation to make spread more even
#  e.g. log
# First make sure all values are positive:

    sort( Smoking$HealthExpPC)

# Do log transformation:

    Smoking$LogHE <- log(Smoking$HealthExpPC)    # create log HE

    densityplot( Smoking$LogHE )

# Also usefult to have categories:

    Smoking$HECat <- cut(Smoking$LogHE, 7)       # create categories
    summary(Smoking)

    Plot3d( LE ~ CigCon + LogHE |HECat, Smoking )  # condition on level of HEpC
    Axes3d()
    Ell3d()
    Identify3d(pad=1)

# Simple regression

    fit.lin <- lm( LE ~ CigCon, Smoking)
    Fit3d( fit.lin )
    fit.lin2 <- lm(LE ~ CigCon, subset(Smoking, Continent != "Africa"))
    Fit3d( fit.lin2)
    Pop3d(4)

# Use multiple regression (advanced version with quadratic surface)

    fit = lm( LE ~ CigCon + I(LogHE^2) +I(CigCon^2) + I(LogHE^2) + I(CigCon*LogHE), Smoking)
    Fit3d( fit, col = 'red' )
    Pop3d(2)
        
# refit omitting Africa:

    fit.na = lm( LE ~ CigCon + I(LogHE^2) +I(CigCon^2) + I(LogHE^2)
               + I(CigCon*LogHE), Smoking, subset = Continent != "Africa")
    Fit3d( fit.na, col = 'red' )

# Marginal relationship

    Pop3d(4)    # pop twice for each fit
    fit.quad <- lm( LE ~ CigCon + I(CigCon^2) , Smoking)
    Fit3d( fit.quad, col = 'green')

#  A quadratic surface within each Continent (overfitting?!)

    fit2 = lm( LE ~ Continent*(CigCon + HealthExpPC +I(CigCon^2) +
            I(HealthExpPC^2) + I(CigCon*HealthExpPC)), Smoking)
    Fit3d( fit2 )

## End(Not run)

p3d documentation built on May 2, 2019, 5:25 p.m.