##### Lesson 5: Let us 'R'eview ##### # Setting the working directory setwd(“provide the path to your folder“) list.files() # Read the datafile # naturalgas_data = read.csv("Natural_Gas_Consumption_by_ZIP_Code_-_2010.csv",header=T) # extract subset of the data # index1 = which(naturalgas_data$Building.type..service.class == "Large residential") index2 = which(naturalgas_data$Building.type..service.class == "Commercial") index3 = which(naturalgas_data$Building.type..service.class == "Institutional") Residential = naturalgas_data$Consumption..GJ.[index1] Commercial = naturalgas_data$Consumption..GJ.[index2] Institutional = naturalgas_data$Consumption..GJ.[index3] # Order statistics and plots # stripchart(Residential,font=2,pch=21,cex=1,xlab="Consumption in GJ",font.lab=2) boxplot(Residential,horizontal=T,col="grey",add=T) # histogram # hist(Residential,font=2,font.lab=2,xlab="Consumption in GJ",col="grey") # Summary statistics # # average # mean(Residential,na.rm=T) # median # median(Residential,na.rm=T) quantile(Residential,na.rm=T,0.5) # variance and standard deviation # var(Residential,na.rm=T) sd(Residential,na.rm=T) # skewness # library(moments) skewness(Residential,na.rm=T) # quantiles and summary # quantile(Residential,na.rm=T) summary(Residential) # comparing the data # all_data = cbind(Residential,Commercial, Institutional) boxplot(all_data,horizontal=T) cv_residential = (sd(Residential,na.rm=T)/mean(Residential,na.rm=T))*100 cv_commercial = (sd(Commercial,na.rm=T)/mean(Commercial,na.rm=T))*100 cv_institutional = (sd(Institutional,na.rm=T)/mean(Institutional,na.rm=T))*100