Guardian Words: Visualized
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Andy Kirk (@visualisingdata) & Lynn Cherny (@arnicas) tweeted about the Guardian Word Count service/archive site, lamenting the lack of visualizations:
Want to know num of words written in each day's Guardian paper by section + approx reading time? http://t.co/wP4W1EzUsx via @bengoldacre
— Andy Kirk (@visualisingdata) March 15, 2014
This gave me a chance to bust out another Shiny app over on our Data Driven Security shiny server:
I used my trusty “Google-Drive-spreadsheet-IMPORTHTML-to-CSV
” workflow (you can access the automagically updated data here) to make the CSV that updates daily on the site and is referenced by the Shiny/R code.
The code has been gist-ified, and I’ll be re-visiting it to refactor the data.frame
creation bits and add some more charts as the data set gets larger.
library(shiny) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
library(gridExtra) | |
shinyServer(function(input, output) { | |
output$countPlot <- renderPlot({ | |
guardian <- read.csv("http://dds.ec/data/guardian.csv", stringsAsFactors=FALSE) | |
guardian$Day <- factor(guardian$Day, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")) | |
guardian$Date <- as.Date(guardian$Date, format="%m/%d/%Y") | |
guardian$Reading.time <- as.numeric(as.difftime(guardian$Reading.time, units="secs"), units="hours") | |
gg <- ggplot(data=guardian, group=Words) | |
gg <- gg + geom_path(aes(x=Date, y=Words), size=0.5) | |
gg <- gg + geom_point(aes(x=Date, y=Words, color=Day), stat="identity", size=3) | |
gg <- gg + scale_y_continuous(limits=c(0, max(guardian$Words)), | |
labels=comma) | |
gg <- gg + labs(x="", y="# words", title="Word Count (per day)") | |
gg <- gg + theme_bw() | |
gg2 <- ggplot(data=guardian, aes(Day, Words)) | |
gg2 <- gg2 + geom_boxplot(aes(fill=Day)) | |
gg2 <- gg2 + scale_y_continuous(limits=c(0, max(guardian$Words)), | |
labels=comma) | |
gg2 <- gg2 + labs(x="", y="# words", title="Word Count Distribution by Day of Week") | |
gg2 <- gg2 + theme_bw() | |
gg2 <- gg2 + theme(legend.position="none") | |
# using grid.arrange() is being lazy. will refactor once the data gets bigger | |
print(grid.arrange(gg, gg2, ncol=1)) | |
}) | |
output$guardianTable = renderDataTable({ | |
# duplicating the read is totally lazy but the data is small enough now that it's OK. | |
guardian <- read.csv("http://dds.ec/data/guardian.csv", stringsAsFactors=FALSE) | |
guardian$Date <- as.Date(guardian$Date, format="%m/%d/%Y") | |
guardian$Reading.time <- as.numeric(as.difftime(guardian$Reading.time, units="secs"), units="hours") | |
# make the headers pretty | |
colnames(guardian) <- c("Date", "Day", "Word Count", "Reading Time (Hours)") | |
return(guardian) | |
}) | |
}) |
library(shiny) | |
shinyUI(basicPage( | |
headerPanel("Guardian Words"), | |
mainPanel( | |
HTML("Data from <a href='http://gu-word-count.appspot.com/archive'>http://gu-word-count.appspot.com/archive</a>. Charts/data update daily.<br/><br/>"), | |
tabsetPanel( | |
tabPanel("Vis",plotOutput("countPlot")), | |
tabPanel("Data",dataTableOutput("guardianTable")), | |
tabPanel("Inspired by", HTML('<blockquote class="twitter-tweet" lang="en"><p>Want to know num of words written in each day's Guardian paper by section + approx reading time? <a href="http://t.co/wP4W1EzUsx">http://t.co/wP4W1EzUsx</a> via <a href="https://twitter.com/bengoldacre">@bengoldacre</a></p>— Andy Kirk (@visualisingdata) <a href="https://twitter.com/visualisingdata/statuses/444772698053693440">March 15, 2014</a></blockquote> | |
<script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>')) | |
), | |
HTML('<hr noshade size="1"/>By <a href="http://twitter.com/hrbrmstr">@hrbrmstr</a>') | |
) | |
)) |
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.