File:NZ opinion polls 2005-2008 new.png

NZ_opinion_polls_2005-2008_new.png(778 × 487 pixels, file size: 72 KB, MIME type: image/png)

Summary

Description
English: Graph showing support for political parties in New Zealand since the 2005 election, according to various political polls. Data is obtained from the Wikipedia page, Opinion_polling_for_the_New_Zealand_general_election,_2008
Date
Source Own work
Author Mark Payne, Denmark
 
This chart was created with R.

Figure is produced using the R statistical package, using the following code. It first reads the HTML directly from the website, then parses the data and saves the graph into your working directory. It should be able to be run directly by anyone with R.

rm(list=ls())
#Load the complete HTML file into memory
html <- readLines(url("http://en.wikipedia.org/wiki/Opinion_polling_for_the_New_Zealand_general_election,_2008"),encoding="UTF-8")
closeAllConnections()
#The third table is the opinion poll data
tbl <- html[(grep("<table.*",html)[3]):(grep("</table.*",html)[3])]
#Now split it into the rows, based on the <tr> tag
tbl.rows <- split(tbl,cumsum(tbl=="<tr>"))
#Now extract the data
survey.dat <- lapply(tbl.rows,function(x) {
  #Start by only considering where we have <td> tags
  td.tags <- x[grep("<td",x)]
  #Polling data appears in columns 3-10
  dat     <- td.tags[3:10]
  #Now strip the data and covert to numeric format
  dat     <- gsub("<td>|</td>","",dat)
  dat     <- gsub("%","",dat)
  dat     <- gsub("-","0",dat)
  dat     <- as.numeric(dat)
  #Getting the date strings is a little harder. The approach we will take is to take advantage
  #of the title="date" hyperlinks to generate a set of dates
  date.str <- td.tags[2]                        #Dates are in the second column
  date.str <- gsub("<sup.*</sup>","",date.str)   #Throw out anything between superscript tags, as its an reference to the source
  titles <- gregexpr("(?U)title=\".*\"",date.str,perl=TRUE)[[1]]    #Find the location of the title tags
  #Now, extract the actual date strings
  date.strings <- rep(NULL,length(titles))
  for(i in 1:length(titles)) {
        date.strings[i] <- substr(date.str,titles[i]+7,titles[i]+attr(titles,"match.length")[i]-2)
  }
  yr <- rev(date.strings)[1]  
  dates <- rep(as.POSIXct(Sys.time()),length(date.strings)-1)
  for(i in 1:(length(date.strings)-1)) {
        dates[i] <- as.POSIXct(strptime(paste(date.strings[i],yr),"%B %d %Y"))
  }
  survey.time <- mean(dates)
  #Get the name of the survey company too
  survey.comp <- td.tags[1]
  survey.comp <- gsub("<sup.*</sup>","",survey.comp)  
  survey.comp <- gsub("<td>|</td>","",survey.comp)  
  survey.comp <- gsub("<U+2013>","-",survey.comp,fixed=TRUE)  
  survey.comp <- gsub("(?U)<.*>","",survey.comp,perl=TRUE)
  #And now return results
  return(data.frame(Company=survey.comp,Date=survey.time,t(dat)))
})

#Combine results 
surveys <- do.call(rbind,survey.dat)
colnames(surveys) <- c("Company","Date","Labour","National","NZ First","Maori Party","Greens","ACT","United Future","Progressive")

#Restrict plot(manually) to parties which have been over 5%
parties <- c("Greens","Labour","National","NZ First")
cols    <- c("darkgreen","red","blue","black")
polls   <- surveys[,c("Company","Date",parties)]
polls <- subset(polls,!is.na(surveys$Date))
polls <- polls[order(polls$Date),]
polls$Date  <- as.double(polls$Date)
ticks <- ISOdate(c(2005,rep(2006,3),rep(2007,3),rep(2008,3)),c(9,rep(c(1,5,9),3)),1)
xlims <- range(as.double(c(ticks,ISOdate(2009,4,1))))
png("NZ_opinion_polls_2005-2008 -parties.png",width=778,height=487,pointsize=16)
par(mar=c(3,4,1,1))
matplot(polls$Date,polls[,parties],pch=NA,xlim=xlims,ylab="Party support (%)",xlab="",col=cols,xaxt="n",ylim=c(0,60))
abline(h=seq(0,95,by=5),col="lightgrey",lty=3)
abline(v=as.double(ticks),col="lightgrey",lty=3)
#Now add loess smoothers
smoothed <- list()
for(i in 1:length(parties)) {
  smoother <- loess(polls[,parties[i]] ~ polls[,"Date"],span=0.25)
  smoothed[[i]] <- predict(smoother,se=TRUE)
  polygon(c(polls[,"Date"],rev(polls[,"Date"])),
    c(smoothed[[i]]$fit+smoothed[[i]]$se.fit*1.96,rev(smoothed[[i]]$fit-smoothed[[i]]$se.fit*1.96)),
    col=rgb(0.5,0.5,0.5,0.5),border=NA)
}
names(smoothed) <- parties
for(i in 1:length(parties)) {
  lines(polls[,"Date"],smoothed[[i]]$fit,col=cols[i],lwd=2)
}
matpoints(polls$Date,polls[,parties],pch=20,col=cols)
legend("topleft",legend=parties,col=cols,pch=20,bg="white",lwd=2)
axis(1,at=as.double(ticks),labels=format(ticks,format="%b\n%Y"),cex.axis=0.8)
axis(4,at=axTicks(4),labels=rep("",length(axTicks(4))))
#Add best estimates
for(i in 1:length(smoothed)) {
  lbl <- sprintf("%4.1f%% ± %2.1f",round(rev(smoothed[[i]]$fit)[1],1),round(1.96*rev(smoothed[[i]]$se.fit)[1],1))
  text(rev(polls$Date)[1],rev(smoothed[[i]]$fit)[1],labels=lbl,pos=4,col=cols[i])
}
dev.off()

#As a cross validation, print the rows where there are NA's
checks <- subset(surveys,apply(surveys,1,function(x) any(is.na(x))))
print(checks)

Licensing

I, the copyright holder of this work, hereby publish it under the following licenses:
w:en:Creative Commons
attribution share alike
This file is licensed under the Creative Commons Attribution-Share Alike 3.0 Unported license.
You are free:
  • to share – to copy, distribute and transmit the work
  • to remix – to adapt the work
Under the following conditions:
  • attribution – You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
  • share alike – If you remix, transform, or build upon the material, you must distribute your contributions under the same or compatible license as the original.
GNU head Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled GNU Free Documentation License.
You may select the license of your choice.

Captions

Add a one-line explanation of what this file represents

Items portrayed in this file

depicts

4 October 2008

File history

Click on a date/time to view the file as it appeared at that time.

(newest | oldest) View (newer 10 | ) (10 | 20 | 50 | 100 | 250 | 500)
Date/TimeThumbnailDimensionsUserComment
current19:45, 26 September 2016Thumbnail for version as of 19:45, 26 September 2016778 × 487 (72 KB)CmdrjamesonCompressed with pngout. Reduced by 47kB (39% decrease).
02:35, 7 November 2008Thumbnail for version as of 02:35, 7 November 2008778 × 487 (120 KB)Avenue Add latest Roy Morgan poll
21:38, 6 November 2008Thumbnail for version as of 21:38, 6 November 2008778 × 487 (120 KB)Avenue Updated to include latest Fairfax-Nielsen poll
18:36, 6 November 2008Thumbnail for version as of 18:36, 6 November 2008778 × 487 (120 KB)Avenue
08:51, 6 November 2008Thumbnail for version as of 08:51, 6 November 2008778 × 487 (14 KB)Trevva{{Information |Description= |Source= |Date= |Author= |Permission= |other_versions= }}
07:56, 28 October 2008Thumbnail for version as of 07:56, 28 October 2008778 × 487 (14 KB)Trevva{{Information |Description= |Source= |Date= |Author= |Permission= |other_versions= }}
08:29, 24 October 2008Thumbnail for version as of 08:29, 24 October 2008778 × 487 (14 KB)Trevva{{Information |Description= |Source= |Date= |Author= |Permission= |other_versions= }}
07:28, 24 October 2008Thumbnail for version as of 07:28, 24 October 2008778 × 487 (14 KB)Trevva{{Information |Description= |Source= |Date= |Author= |Permission= |other_versions= }}
07:15, 20 October 2008Thumbnail for version as of 07:15, 20 October 2008778 × 487 (14 KB)Trevva{{Information |Description= |Source= |Date= |Author= |Permission= |other_versions= }}
09:49, 18 October 2008Thumbnail for version as of 09:49, 18 October 2008778 × 487 (14 KB)Trevva{{Information |Description= |Source= |Date=18 October 2008 |Author= |Permission= |other_versions= }}
(newest | oldest) View (newer 10 | ) (10 | 20 | 50 | 100 | 250 | 500)
The following pages on the English Wikipedia use this file (pages on other projects are not listed):