##### Lesson 40: Discrete distributions in R - Part II ##### # Setting the working directory setwd(“provide the path to your folder“) list.files() ######################### Real Data ######################### # Read the temperature data file # temperature_data = read.table("temperature_data_CentralPark.txt",header=T) # Years # years = unique(temperature_data$Year) # Identifying unique years in the data nyrs = length(years) # number of years of data # November Coldest Day # november_coldtemp = matrix(NA,nrow=nyrs,ncol=1) for (i in 1:nyrs) { days = which((temperature_data$Year==years[i]) & (temperature_data$Month==11)) # index to find november days in each year november_coldtemp[i,1] = min(temperature_data$TAVG[days]) # computing the minimum of these days } # Plot the time series # plot(years, november_coldtemp,type="o") # There is trend in the data # # Take a subset of data from recent years to avoid issues with trend (for now)-- # # 1982 - 2016 november_recent_coldtemp = november_coldtemp[114:148] plot(1982:2016,november_recent_coldtemp,type="o") abline(h=30) ######################### GEOMETRIC DISTRIBUTION ######################### # The real data case # n = length(november_recent_coldtemp) cold_years = which(november_recent_coldtemp <= 30) ncold = length(cold_years) p = ncold/n x = 0:n px = dgeom(x,p) plot((x+1),px,type="h",xlab="Random Variable X (Cold on kth year)",ylab="Probability P(X=k)",font=2,font.lab=2) abline(v = (1/p),col="red",lwd=2) txt1 = paste("probability of cold year (p) = ",round(p,2),sep="") txt2 = paste("Return period of cold years = E[X] = 1/p ~ 3.5 years",sep="") text(20,0.2,txt1,col="red",cex=1) text(20,0.1,txt2,col="red",cex=1) pgeom(5,p) ######## Animation (varying p) ######### # Create png files for Geometric distribution # png(file="geometric%02d.png", width=600, height=300) n = 35 # to mimic the sample size for november cold x = 0:n p = 0.1 for (i in 1:5) { px = dgeom(x,p) plot(x,px,type="h",xlab="Random Variable X (First Success on kth trial)",ylab="Probability P(X=k)",font=2,font.lab=2) txt = paste("p=",p,sep="") text(20,0.04,txt,col="red",cex=2) p = p+0.2 } dev.off() # Combine the png files saved in the folder into a GIF # library(magick) geometric_png1 <- image_read("geometric01.png","x150") geometric_png2 <- image_read("geometric02.png","x150") geometric_png3 <- image_read("geometric03.png","x150") geometric_png4 <- image_read("geometric04.png","x150") geometric_png5 <- image_read("geometric05.png","x150") frames <- image_morph(c(geometric_png1, geometric_png2, geometric_png3, geometric_png4, geometric_png5), frames = 15) animation <- image_animate(frames) image_write(animation, "geometric.gif") ######################### Negative Binomial DISTRIBUTION ######################### require(combinat) comb = function(n, x) { return(factorial(n) / (factorial(x) * factorial(n-x))) } # The real data case # n = length(november_recent_coldtemp) cold_years = which(november_recent_coldtemp <= 30) ncold = length(cold_years) p = ncold/n r = 3 # third cold year x = r:n px = NA for (i in r:n) { dum = comb((i-1),(r-1))*p^r*(1-p)^(i-r) px = c(px,dum) } px = px[2:length(px)] plot(x,px,type="h",xlab="Random Variable X (Third Cold year on kth trial)",ylab="Probability P(X=k)",font=2,font.lab=2) ######################### Poisson DISTRIBUTION ######################### # rearrange the data into columns of 5 years # data_rearrange = array(november_recent_coldtemp,c(5,7)) # count the number of years in each column with temp < 30 counts = apply(data_rearrange,2,function(x) length(which(x <= 30))) plot(0:5,dpois(0:5,mean(counts)),type="h",xlab="Random Variable X (Cold events in 5 years)",ylab="Probability P(X=k)",font=2,font.lab=2)