[This article was first published on Ripples, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Would love to get a post from you for KDnuggets (Gregory Piatetsky, KDnuggets President)
This is the code I wrote to do this experiment:
require(sqldf) require(googleVis) require(ggplot2) require(ggthemes) setwd("YOUR WORKING DIRECTORY HERE") #This line doen's work until you type a valid path #Data Avaliable in http://eca.knmi.nl/utils/downloadfile.php?file=download/ECA_blend_tg.zip files=list.files(pattern = "TG_STAID") results=data.frame(staid=character(0), trnd=numeric(0), nobs=numeric(0)) #Loop to calculate linear trends for (i in 1:length(files)) { table=read.table(files[i], header=TRUE, sep = ',', skip=20) table=table[table$Q_TG==0,] table$date=as.Date(as.character(table$DATE), "%Y%m%d") results=rbind(data.frame(staid=files[i], trnd=lm.fit (x = matrix(table$date), y = table$TG)$coefficients, nobs=nrow(table)), results) } write.csv(results, file="results.csv", row.names = FALSE)#Save your work results=read.csv(file="results.csv")#Read your work. You can start here if you stop your work further this line #Remove outliers results=results[!results$trnd %in% boxplot.stats(results$trnd, coef = 4)$out,] #Histogram hist(x=results$trnd, breaks = 50, col = "orange", border = "pink", freq=TRUE, xlim=c(-0.03, 0.03), ylim=c(0, 300), xlab="Historical trend of daily mean temperatures", ylab="Number of stations", main="Evolution of temperatures in Europe and the Mediterranean", sub="Source: European Climate Assessment & Dataset project") results$staid2=as.numeric(gsub("[^0-9]","",results$staid)) #To join results with geographical coordinates #Read table of geographical coordinates staids=read.table("http://www.ecad.eu/download/ECA_all_stations.txt", header=TRUE, sep = ',', skip=17) #Right tail of the distribution hotpoints=sqldf("SELECT a.staid, a.staid2, a.trnd, a.nobs, b.staname, b.lat, b.lon FROM results a INNER JOIN staids b ON (a.staid2=b.staid) WHERE TRND >= 0.02") #Convert degrees:minutes:seconds to decimal coordinates hotpoints=within(hotpoints, { dms=do.call(rbind, strsplit(as.character(LAT), ":")) lat=sign(as.numeric(dms[,1]))*(abs(as.numeric(dms[,1]))+(as.numeric(dms[,2]) + as.numeric(dms[,3])/60)/60);rm(dms) }) hotpoints=within(hotpoints, { dms=do.call(rbind, strsplit(as.character(LON), ":")) lon=sign(as.numeric(dms[,1]))*(abs(as.numeric(dms[,1]))+(as.numeric(dms[,2]) + as.numeric(dms[,3])/60)/60);rm(dms) }) #To make readable tha name of the station in the map hotpoints$staname=gsub("^\s+|\s+$", "", hotpoints$STANAME) #To prepare coordinates to gvis function hotpoints$LatLong=with(hotpoints, paste(lat, lon, sep=":")) #The amazing gvisMap function of googleVis package hotpointsMap=gvisMap(hotpoints, "LatLong" , "STANAME", options=list(showTip=TRUE, showLine=TRUE, enableScrollWheel=TRUE, mapType='terrain', useMapTypeControl=TRUE)) plot(hotpointsMap) #The plot one of this hot stations InAmenas=read.table("TG_STAID000312.txt", header=TRUE, sep = ',', skip=20) InAmenas=InAmenas[InAmenas$Q_TG==0,] InAmenas$date=as.Date(as.character(InAmenas$DATE), "%Y%m%d") ggplot(InAmenas, aes(date, TG)) + geom_line(color="red", alpha=0.8) + xlab("") + ylab("Mean temperature in 0.1C")+ ggtitle("Mean temperature in IN-AMENAS (ALGERIA) 1958- 1998")+ geom_smooth(method = "lm", se=FALSE, color="red", lwd=2)+ theme_economist(base_size = 20, base_family = "sans", horizontal = TRUE, dkpanel = FALSE, stata = FALSE)
To leave a comment for the author, please follow the link and comment on their blog: Ripples.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.