R: Summarizing meteorological data

9 February, 2011 (11:39) | R | By: Manuel Gimond


Dowload data for this exercise here:Waterville meteorological data for 2010



# Load ggplot2 library
library(ggplot2)

# Load data and convert date field to a date (POSIX) object.
# The date/time is in standard time.
met.raw = read.csv ( "waterville_2010.csv",
                     na.strings=c("NaN","M"),header=T)
# extract date/time column. No DST.
met.date = strptime(met.raw$Date,
                     format = "%m/%d/%Y %H:%M", tz="EST")  
met.1 = met.raw[, -1] # Remove original data/time
met = data.frame(date = met.date,met.1) # Append date/time formatted data 
# column to met.1 (overwrite met object)
#
# Plot monthly statistics
month.year = function (x) {as.POSIXlt(x)$mon + 1} # get months lis
month = month.year(met$date)
Month = factor(month, levels = 1:12, month.abb) # Convert to abbreviated months
met.month = cbind(Month, met)

p1 = ggplot(met.month, aes(Month, OutTemp))
p2 = p1 + geom_jitter(position = position_jitter(w=0.15, h=0),
     alpha=I(0.1), aes(colour=(Temp scale_colour_manual(values=c("#ff9999", 
     "#9999ff"))+ geom_boxplot(fill="#dddddd", alpha=I(0.5), aes(line=Month),
     outlier.size=1, outlier.colour="#BBBBBB", colour="#666666") +
     scale_y_continuous(name="Temperature (°F)")+
     opts(legend.position="none",
     title="Waterville Meteorological Data Summary for 2010")

# Plot hourly temperatures
hour.day = function(x){as.POSIXlt(x)$hour+1}
hour = hour.day(met$date)
met.hour = cbind(hour,met)
#
l1 = ggplot(met.hour, aes(as.factor(hour), Temp))
l2 = l1+geom_jitter(position=position_jitter(w=0.15, h=0),
alpha = I(0.1), aes(colour=(Temp<32)))+
        scale_colour_manual(values=c("#ff9999", "#9999ff"))+
        geom_boxplot(fill="#dddddd", alpha=I(0.5), aes(line=hour),
        outlier.size=1, outlier.colour="#BBBBBB",
        colour="#666666")+
        scale_y_continuous(name="Temperature (°F)")+
        scale_x_discrete(name="Hour of day (EST)")+
        opts(legend.position="none",
        title="Waterville Meteorological Data Summary for 2010")

# Plot hourly temperatures by month
met.hour.month = cbind(hour,met.month)
#
m1 = ggplot(met.hour.month, aes(as.factor(hour), Temp))
m2 = m1+geom_jitter(position=position_jitter(w=0.15, h=0), size=1,
alpha = I(0.7), aes(colour=(Temp<32)))+
        scale_colour_manual(values=c("#ff9999", "#9999ff"))+
        facet_wrap(~Month,ncol=3)+
        geom_boxplot(fill="#dddddd", alpha=I(0.5), aes(line=hour),
        outlier.size=0, outlier.colour="#BBBBBB",
        colour="#666666")+
        scale_y_continuous(name="Temperature (°F)")+
        scale_x_discrete(name="Hour of day (EST)", breaks=c(6,12,18),
        labels=c(6,12,18))+
        opts(legend.position="none",
        title="Waterville Meteorological Data Summary for 2010")

#Save plots to png files
ggsave("Waterville_month_Temp.png", plot=p2, width=11, height=8)
ggsave("Waterville_hourly_Temp.png", plot=l2, width=11, height=8)
ggsave("Waterville_time_month_Temp.png", plot=m2, width=8, height=11)