#Fix Legends (DONE) #Fix colors (DONE) #Make graphs less ugly #Africa sub-regions should be split into a panelled graph #Regional correlations should be a graph. Possible 2: Continents & Subregions #Better explanation on later sections, possibly re-order #Remove this library section
INDIVIDUAL VARIABLES
library(ggplot2) library(plotly)
Suicide is a complicated and delicate subject, influenced by a plethora of cultural, religious, and social factors. These factors can have a strong influence on reporting, which may partially explain the gapminder.org dataset's somewhat odd definition, "Mortality due to self-inflicted injury", which is notably missing any element of intent. In this section, we will be focusing purely on how the data presented is effected by income inequality via gini index.
The following graph shows suicide rate (per 100,000 people) as a function as GINI, broken down by continent:
#2D version data.melt.plot<-subset(data.melt,continent!='Seven seas (open ocean)') plot<-ggplot(data=data.melt.plot) + geom_point(mapping=aes_string( x='gini', y='suicide', col='continent', #size=.5, alpha = .1, label= 'country' )) + ggtitle(paste('Average Suicide Rate by gini Index',sep='')) + xlab('Gini Index') + xlim(25,65) + ylab('Suicide Rate Per 100,000 People') + ylim(0,70) + guides(size="none",alpha="none") + scale_color_manual(values = region.color,na.translate=FALSE) + theme_dark() plot + facet_wrap(~continent,nrow=2,ncol=3) #ggplotly(plot) #3D version. #Rounding is for hovertemplate readability. Makes points slightly stiff looking. data.melt.avg.rounded<-data.melt.avg %>% dplyr::mutate_if(is.numeric, round) fig<-plotly::plot_ly(data.melt.avg.rounded, x=~continent,y=~gini_avg,z=~suicide_avg, color=~continent,colors=region.color,alpha=.8, text=~country, hovertemplate=paste('%{x}','%{text}','GINI:%{y}','SR:%{z}',sep='<br>')) fig <- fig %>% add_markers() fig <- fig %>% layout(scene = list(xaxis = list(title = ''), yaxis = list(title = 'gini index'), zaxis = list(title = 'Suicide Rate'))) fig <- fig %>% layout(title="Suicide Rate by GINI Index", showlegend = FALSE) fig
At the global level, there does not appear to be a strong relationship between the suicide rate and income inequality. What is clear is that there are strong regional variations both in income inequality and suicide rate.
Gini and suicide rate are very weakly correlated in most continents, except in Africa. (The 'seven seas' continent in this case contains Seychelles and Mauritius, which fall into the East Africa subregion, and the Maldives, which fall under Asia. We will be including Seychelles and Mauritius in Africa for this section, for the sake of completeness.)
Before going further, we also need to address Lesotho. Lesotho is an interesting case study for this topic, being a small, low income, medium inequality country completely surrounded by a large, higher income, higher inequality country, its suicide rate (avg 61/100,000) is the highest in the world, almost 50% higher than the next highest country. It is, in fact, so high that it makes some smaller graphs difficult to read. Because of this, it will be omitted from here on.
#Regional correlation values. continents.mat<-rbind(t(as.matrix(unique(data.melt$continent))),0) for(j in 1:7){ #Ideally should be ncol(continents.mat)-1 instead of 7. data.melt.filtered<-subset(data.melt,continent==continents.mat[1,j]) continents.mat[2,j]<-round(cor( data.melt.filtered[,which(colnames(data.melt.filtered)=='gini')], data.melt.filtered[,which(colnames(data.melt.filtered)=='suicide')], use = "pairwise.complete.obs"),3) } prmatrix(continents.mat[1:2,1:7],rowlab=rep("",2),collab=rep("",8)) #Subregional correlation values. subregions.mat<-rbind(t(as.matrix(unique(data.melt$subregion))),0) for(j in 1:22){ data.melt.filtered<-subset(data.melt,subregion==subregions.mat[1,j]) subregions.mat[2,j]<-round(cor( data.melt.filtered[,which(colnames(data.melt.filtered)=='gini')], data.melt.filtered[,which(colnames(data.melt.filtered)=='suicide')], use = "pairwise.complete.obs"),3) } #prmatrix(subregions.mat[1:2,1:22],rowlab=rep("",2),collab=rep("",22)) t(subregions.mat)
data.melt.africa<-subset( subset(data.melt.avg,country!='Maldives') # & country!='Lesotho' & country!='Eswatini' ,continent %in% c('Africa','Seven seas (open ocean)')) data.melt.africa$continent<-'Africa' plot<-ggplot(data=data.melt.africa) + geom_point(mapping=aes_string( x='gini_avg', y='suicide_avg', col='subregion' #size='income_avg', #alpha = .8 #label='country', )) + ggtitle(paste('Suicide rate by gini Index',sep='')) + xlab('Gini Index') + xlim(25,65)+ ylab('Suicide Rate Per 100,000 People') + ylim(0,45)+ guides(size="none",alpha="none") + scale_color_manual(values = region.color,na.translate=FALSE) #+ #theme_dark() plot + facet_wrap(~subregion,nrow=2,ncol=3)
When broken down by subregion, we can see that the general correlation between increased income inequality and increased suicide rate holds, except in Southern Africa. The following graph, this time using income instead of inequality, may point to a possible answer:
data.melt.africa<-subset( subset(data.melt.avg,country!='Maldives') # & country!='Lesotho' & country!='Eswatini' ,continent %in% c('Africa','Seven seas (open ocean)')) data.melt.africa$continent<-'Africa' plot<-ggplot(data=data.melt.africa) + geom_point(mapping=aes_string( x='income_avg', y='suicide_avg', col='subregion' #size='income_avg', #alpha = .8 #label='country', )) + ggtitle(paste('Suicide rate by Income',sep='')) + xlab('Income') + #xlim(25,65)+ ylab('Suicide Rate Per 100,000 People') + ylim(0,45)+ guides(size="none",alpha="none") + scale_color_manual(values = region.color,na.translate=FALSE) #+ #theme_dark() plot + facet_wrap(~subregion,nrow=2,ncol=3)
plot<-ggplot(data=data.melt) + geom_point(,mapping=aes_string( x='gini', y='suicide', col='continent', label='country', alpha = .3 )) + ggtitle(paste('Suicide rate by gini Index, grouped by Income',sep='')) + xlab('Gini Index') + xlim(25,65)+ ylab('Suicide Rate Per 100,000 People') + ylim(0,40)+ guides(size="none",alpha="none") + scale_color_manual(values = region.color,na.translate=FALSE) + theme_dark() ggplotly(plot)
To loop back around to the world as a whole, this is the original graph with Lesotho/Eswatini removed. Although relationships
#Sets variables used, category names, and scaling. data.cat<-cbind(data.melt.avg$country,data.melt.avg$gini_avg,data.melt.avg$poverty_avg,'Poverty') data.cat<-rbind(data.cat, cbind(data.melt.avg$country,data.melt.avg$gini_avg,(data.melt.avg$education_avg*.66),'Education (2/3)')) data.cat<-rbind(data.cat, cbind(data.melt.avg$country,data.melt.avg$gini_avg,(data.melt.avg$suicide_avg*2),'Suicides per 100,00 (x2)')) data.cat<-rbind(data.cat, cbind(data.melt.avg$country,data.melt.avg$gini_avg,(data.melt.avg$babies_per_woman_avg*10),'Average Babies per Woman (x10)')) colnames(data.cat)<-c('country','gini','values','type') data.cat<-data.frame(data.cat) data.cat$gini<-as.numeric(data.cat$gini) data.cat$values<-as.numeric(data.cat$values) data.melt.avg %>% dplyr::mutate_if(is.numeric, function(x) (x-mean(x, na.rm=T))/sd(x, na.rm=T)) plot<-ggplot2::ggplot(data=data.cat) + ggplot2::geom_point(mapping=aes_string( x='gini', y='values', alpha = .3 )) + ggplot2::ggtitle(paste('Individual Variables by GINI Index',sep='')) + ggplot2::xlab('GINI Index') + xlim(25,75) + ggplot2::ylab('Adjusted Values') + ylim(0,100) + ggplot2::guides(size="none",alpha="none") + ggplot2::scale_color_manual(values = region.color,na.translate=FALSE) + ggplot2::theme_minimal() plot + ggplot2::facet_wrap(~type,nrow=2,ncol=2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.