library('data.table') library('ncdf4') library('zoo') library('reshape2') library('plyr') library('HiddenMarkov') library('leaps') library('ggplot2') library('locfit') source('lib.R') max_state <- 3 compare_state <- 2 nsims <- 200 x <- read.table("data/lf-annual-wy-af.txt") x <- ts(x[,2],start=x[1,1])/10^6 xdt <- data.table(date=as.vector(time(x)),x=as.vector(x)) np <- length(x) x.5yr <- rollapply(x,5,mean,align='left') climate_m <- as.data.table(read.csv('data/winter_climate_indicies.csv')) climate <- as.data.table(dcast(climate_m,date~variable)) options(warn=-1) pars <- get.named.parlist(x, compare_state, 'gamma', 'same.sd') hmm <- dthmm(x, Pi_init(compare_state), delta_init(compare_state), 'gamma', pars) sink('/dev/null') hmm <- BaumWelch(hmm) sink() options(warn=2) # The HMM model will randomly switch which number it assigns to a state (1 or 2) # but the decoding will always be the same shape # 1 - low state # 2 - high state decoding <- data.table(state=Viterbi(hmm),date=seq(1906,by=1,len=np)) if(decoding$state[1] == 1){ decoding[state==1]$state <- 99L decoding[state==2]$state <- 1L decoding[state==99]$state <- 2L } decoding_lag <- lagged_dt(decoding,3) xdt_lag <- lagged_dt(xdt,3) dt <- merge(climate,xdt_lag,by='date') dt <- merge(dt,decoding_lag,by='date') fitglm = glm(state-1 ~ state_lag1+state_lag2+state_lag3, data=dt, family=binomial()) bestmodel = fitglm sink('/dev/null') bestmodel = stepAIC(fitglm) sink() predictions <- data.table(date=integer(0), state=integer(0), statep=numeric(0)) ensembles <- data.table(date=integer(0), value=numeric(0)) pb <- txtProgressBar(1,nrow(dt),style=3) for(j in 1:nrow(dt)){ setTxtProgressBar(pb,j) # predict the probabilities for this year statepred = predict(bestmodel,dt[j],type="response") lf1 <- locfit(x ~ amo+pdo+enso+pdsi, dt[-j][state==1], deg=1, alpha=.8) lf2 <- locfit(x ~ amo+pdo+enso+pdsi, dt[-j][state==2], deg=1, alpha=.8) lf <- locfit(x ~ amo+pdo+enso+state+state_lag1, dt[-j], deg=1, alpha=.95) # the way thigs are set up, statepred is the probability of being in state 2 r1 <- round(1-statepred) r2 <- round(statepred) #ensemble <- c(predict_plus_se_sample(lf1,dt[j],round(nsims*r1)), # predict_plus_se_sample(lf2,dt[j],round(nsims*r2))) ensemble <- predict_plus_se_sample(lf,dt[j],nsims) ensembles <- rbind(ensembles, data.table(date=dt[j,date], value=ensemble) ) predictions <- rbind(predictions, data.table( date=dt[j,date], state=dt[j,state], statep=statepred)) } close(pb) predictions$state=factor(predictions$state) p <- ggplot(dt)+ geom_bar(data=predictions,aes(x=date,y=statep,fill=state),stat='identity')+ theme_bw() print(p) p2 <- ggplot(dt)+ geom_boxplot(data=ensembles,aes(x=date,y=value,group=date),outlier.colour=NA)+ geom_line(aes(x=date,y=x))+ theme_bw()+coord_cartesian(ylim=c(0,30)) print(p2)