New Zealand · R

New Zealand Election Survey 2014 processing code

This is the code that lies under a (less technical) article on the NZES I wrote published on Public Address, in more or less the order it is used in the article

Data setup and organisation

#for all subsequent code chunks, this chunk is assumed to be run and the libraries loaded.
if(!require(haven)){
 install.packages("haven")
 require(haven)
}
if(!require(tidyr)){
 install.packages("tidyr")
 require(tidyr)
}
if(!require(ggplot2)){
 install.packages("ggplot2")
 require(ggplot2)
}
if(!require(dplyr)){
 install.packages("dplyr")
 require(dplyr)
}
if(!require(knitr)){
 install.packages("knitr")
 require(knitr)
}


#load in the spss file from whereever it happens to be saved
ds <- read_spss("~/Desktop/nzes2014/NZES2014GeneralReleaseApril16.sav")
#reduce the columns to those used in this article
#simplifying the columns and renaming them in the process
ds2 <- data.frame(Labour= as.integer(ds$dlablr),
 National= as.integer(ds$dnatlr),
 Green= as.integer(ds$dgrnlr),
 NZ_First= as.integer(ds$dnzflr),
 ACT= as.integer(ds$dactlr),
 United_Future= as.integer(ds$dunflr),
 Māori_Party = as.integer(ds$dmaolr),
 Mana = as.integer(ds$dmnplr),
 Internet = as.integer(ds$dintlr),
 Conservative= as.integer(ds$dconlr),
 Self_Identification= as.integer(ds$dslflr))

## make those code 99 NA
for (i in 1:ncol(ds2)){
 x <- ds2[,names(ds2)[i]]
 x[x>10] <- NA
 ds2[,names(ds2)[i]] <- as.integer(x)
}

##this function is needed as for some columns there are missing value labels so can't as_factor
applyValLabels <- function(column){
 knownvalues <- attributes(column)$labels
 valuelabels <- names(knownvalues)
 lookup <- data.frame(column = as.character(knownvalues), valuelabels, stringsAsFactors = FALSE)
 target <- data.frame(column = as.character(column), index=1:length(column), stringsAsFactors = FALSE)
 combined <- merge(target, lookup, all.x=TRUE)
 combined <- combined[order(combined$index),]
 output <- as.character(column)
 output[!(is.na(combined$valuelabels))] <- combined$valuelabels[!(is.na(combined$valuelabels))]
 return(output)
}

ds2$Party_Vote <- applyValLabels(ds[["dpartyvote"]])
#fix a few unicode issues, and spaces for later matches to column names
ds2$Party_Vote[ds2$Party_Vote == "M?ori Party"] <- "Māori_Party"
ds2$Party_Vote[ds2$Party_Vote == "Internet?Mana Party"] <- "Internet_Mana_Party"
ds2$Party_Vote[ds2$Party_Vote == "No Vote"] <- "No_Vote"
ds2$Party_Vote[ds2$Party_Vote == "NZ First"] <- "NZ_First"
ds2$Party_Vote[ds2$Party_Vote == "United Future"] <- "United_Future"
ds2$Party_Vote[ds2$Party_Vote == "Another party"] <- "Another_party"
ds2$Party_Vote[ds2$Party_Vote == "Democrats for Social Credit"] <- "Democrats_for_Social_Credit"
ds2$Party_Vote[ds2$Party_Vote == "Ban 1080"] <- "Ban_1080"

ds2$Best_Issue_Party <- applyValLabels(ds[["dissuedeal"]])
#fix a few unicode issues, and spaces for later matches to column names
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "M?ori Party"] <- "Māori_Party"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "Internet?Mana Party"] <- "Internet_Mana_Party"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "No Vote"] <- "No_Vote"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "NZ First"] <- "NZ_First"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "United Future"] <- "United_Future"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "Another party"] <- "Another_party"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "Democrats for Social Credit"] <- "Democrats_for_Social_Credit"
ds2$Best_Issue_Party[ds2$Best_Issue_Party == "Ban 1080"] <- "Ban_1080"

First graph

par(mar=c(5,7.6,4,2))
boxplot(ds2$Self_Identification ~ ds2$Party_Vote, frame.plot=FALSE, varwidth="T",
 horizontal=TRUE, las=2, cex.axis=0.6, border="white",
 xlab= "left <- 5 (centre) -> right",
 main="Voters self-ratings on left-right spectrum\nby party voted for")
abline(v=5, col="red", lwd=2)
abline(v=5, col="blue", lty=2, lwd=2)
boxplot(ds2$Self_Identification ~ ds2$Party_Vote, frame.plot=FALSE, varwidth="T",
 horizontal=TRUE, las=2, axes=FALSE, border="#333333", add=TRUE)
par(mar=c(5,4,4,2))

Second graph

ds2$Chosen_Party <- 99
ds2$Chosen_Party[which(ds2$Party_Vote == "National")] <- ds2$National[which(ds2$Party_Vote == "National")] 
ds2$Chosen_Party[which(ds2$Party_Vote == "Conservative")] <- ds2$Conservative[which(ds2$Party_Vote == "Conservative")] 
ds2$Chosen_Party[which(ds2$Party_Vote == "NZ_First")] <- ds2$NZ_First[which(ds2$Party_Vote == "NZ_First")] 
ds2$Chosen_Party[which(ds2$Party_Vote == "Māori_Party")] <- ds2$Māori_Party[which(ds2$Party_Vote == "Māori_Party")]
ds2$Chosen_Party[which(ds2$Party_Vote == "Labour")] <- ds2$Labour[which(ds2$Party_Vote == "Labour")] 
ds2$Chosen_Party[which(ds2$Party_Vote == "Green")] <- ds2$Green[which(ds2$Party_Vote == "Green")] 
#needed to drop internetmana as parties were asked about seperately

## dropping the 99s and the NAs
ds4 <- ds2[ds2$Chosen_Party < 11 & !is.na(ds2$Chosen_Party) & ds2$Self_Identification < 11 & !is.na(ds2$Self_Identification),]

for_graph <- c( "National","Conservative", "NZ_First", 
"Māori_Party", "Labour", "Green")
ds4 <- ds4[, c("Self_Identification", "Party_Vote", "Chosen_Party")]
ds4$Party_Vote <- factor(ds4$Party_Vote, levels = for_graph)
ds4 <- ds4 %>% gather(who, rating, Self_Identification, Chosen_Party)
ds4$who[ds4$who == "Self_Identification"] <- "Self_Rating"
ds4$who[ds4$who == "Chosen_Party"] <- "Chosen_Party"

par(mar=c(5,7.6,4,2))
boxplot(ds4$rating ~ ds4$who*ds4$Party_Vote , frame.plot=FALSE,
 horizontal=TRUE, las=2, cex.axis=0.6, border="#333333",
 xlab= "left <- 5 (centre) -> right",
 main="Voters and their chosen parties\nself-rated left/right scale", col=(c("grey","white")))
par(mar=c(5,4,4,2))

Scatter Graph

## dropping the 99s and the NAs
ds5 <- ds2[ds2$Chosen_Party < 11 & !is.na(ds2$Chosen_Party) & ds2$Self_Identification < 11 & !is.na(ds2$Self_Identification),]
for_graph <- c( "National","Conservative", "NZ_First", 
"Māori_Party", "Labour", "Green")
ds5 <- ds5[, c("Self_Identification", "Party_Vote", "Chosen_Party")]
ds5$Party_Vote <- factor(ds5$Party_Vote, levels = for_graph)
library(ggplot2)
ggplot(ds5, aes(x=Self_Identification, y=Chosen_Party)) + 
 geom_point(alpha=0.4, position=position_jitter(width=1,height=1)) + 
 geom_abline(intercept=0, slope=1, col="blue") +
 annotate("text", x = 1, y = 8, label = "Voter to\nleft\nof Party", size=3, colour="blue") +
 annotate("text", x = 9, y = 3, label = "Voter to\nright\nof Party", size=3, colour="blue") +
 facet_wrap( ~ Party_Vote, ncol=2)

Calculating closest party

### Now lets work out how many voters voted for a party when there was a closer one
##excluding Mana Internet as they seperate entries
isClosest <- function(x){
 parties <- names(x)[1:8]
 #normalise on selfLR
 norm <- abs(as.integer(x[1:8]) - as.integer(x[9]))
 isNear <- parties[norm == min(norm)]
 if (sum(isNear== x[10])==1) {
 return(1)
 } else {
 return(0)
 }
}

comparible <- c("Labour", "National", "Green", "NZ_First", "ACT",
 "United_Future","Māori_Party", "Conservative")
ds6 <- ds2[ds2$Party_Vote %in% comparible, c(comparible, "Self_Identification", "Party_Vote", "Best_Issue_Party")]
ds6 <- ds6[complete.cases(ds6),]
votlr <- apply(ds6,1,isClosest)
ds6$party_close <- votlr
closeparty <- ds6 %>% group_by(Party_Vote) %>% summarise(percentClose = round(100 *sum(party_close)/ n(), 2), number=n()) %>% arrange(desc(percentClose))
names(closeparty) <- c("Party", "% of voters closest", "Sample size")
kable(closeparty)

Calculating voters finding their party best on key issue

ds6$issue_got_vote <- as.numeric(ds6$Party_Vote ==ds6$Best_Issue_Party)
issueparty <- ds6 %>% group_by(Party_Vote) %>% summarise(percentIssue = round(100 *sum(issue_got_vote)/ n(), 2), number=n()) %>% arrange(desc(percentIssue))
names(issueparty) <- c("Party", "% voters think best", "Sample size")
kable(issueparty)

Election results linear regressions

votescompare <- ds6 %>% group_by(Party_Vote) %>% summarise(percentIssue = 100 *sum(issue_got_vote)/ n(), number=n(), percentClose = 100 *sum(party_close)/ n(), number=n()) 
votescompare$election <- c(0.69,3.97,10.70, 25.13, 1.32, 47.04, 8.66, 0.22)
summary(lm(election ~ percentIssue, votescompare))
summary(lm(election ~ percentClose, votescompare))
summary(lm(election ~ percentClose + percentIssue, votescompare))

Election results graph

votescompare %>% filter(Party_Vote != "ACT", Party_Vote != "United_Future") %>% arrange(percentIssue) %>%
 ggplot(aes(x=percentIssue, y=election)) + stat_smooth(method="lm") + geom_point() +
 ylab("Election Vote Percentage") + xlab("% of supporters that think their party is\nbest at most important issue")

Closeness and bestness combinations

setOfFour <- ds6 %>% group_by(Party_Vote) %>% 
 summarise(bestClose = round(100 *sum(issue_got_vote == 1 & party_close==1)/ n(), 2),
 closeNotBest = round(100 *sum(issue_got_vote == 0 & party_close==1)/ n(), 2),
 bestNotClose = round(100 *sum(issue_got_vote == 1 & party_close==0)/ n(), 2),
 notBestNotClose = round(100 *sum(issue_got_vote == 0 & party_close==0)/ n(), 2)) %>% 
 gather(metric,percent, bestClose:notBestNotClose) %>% select (metric,Party_Vote, percent) %>%
 arrange(metric, Party_Vote)

kable(setOfFour[setOfFour$metric == "bestClose",])

kable(setOfFour[setOfFour$metric == "bestNotClose",])

kable(setOfFour[setOfFour$metric == "closeNotBest",])

kable(setOfFour[setOfFour$metric == "notBestNotClose",])

 

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s