library('data.table')
library('ncdf4')
library('zoo')
library('reshape2')
library('plyr')
library('HiddenMarkov')
library('leaps')
library('ggplot2')
library('locfit')

source('lib.R')

max_state <- 3
compare_state <- 2
nsims <- 200

x <- read.table("data/lf-annual-wy-af.txt") 
x <- ts(x[,2],start=x[1,1])/10^6
xdt <- data.table(date=as.vector(time(x)),x=as.vector(x))
np <- length(x)

x.5yr <- rollapply(x,5,mean,align='left')


climate_m <- as.data.table(read.csv('data/winter_climate_indicies.csv'))
climate <- as.data.table(dcast(climate_m,date~variable))

options(warn=-1)
pars <- get.named.parlist(x, compare_state, 'gamma', 'same.sd')
hmm <- dthmm(x, Pi_init(compare_state), delta_init(compare_state), 'gamma', pars)
sink('/dev/null')
hmm <- BaumWelch(hmm)
sink()
options(warn=2)

# The HMM model will randomly switch which number it assigns to a state (1 or 2)
# but the decoding will always be the same shape
# 1 - low state 
# 2 - high state
decoding <- data.table(state=Viterbi(hmm),date=seq(1906,by=1,len=np))
if(decoding$state[1] == 1){
    decoding[state==1]$state <- 99L
    decoding[state==2]$state <- 1L
    decoding[state==99]$state <- 2L
}


decoding_lag <- lagged_dt(decoding,3)
xdt_lag <- lagged_dt(xdt,3)

dt <- merge(climate,xdt_lag,by='date')
dt <- merge(dt,decoding_lag,by='date')

fitglm = glm(state-1 ~ state_lag1+state_lag2+state_lag3, data=dt, family=binomial())

bestmodel = fitglm
sink('/dev/null')
bestmodel = stepAIC(fitglm)
sink()

predictions <- data.table(date=integer(0), state=integer(0), statep=numeric(0))
ensembles <- data.table(date=integer(0), value=numeric(0))

pb <- txtProgressBar(1,nrow(dt),style=3)
for(j in 1:nrow(dt)){
    setTxtProgressBar(pb,j)

    # predict the probabilities for this year
    statepred = predict(bestmodel,dt[j],type="response")

    lf1 <- locfit(x ~ amo+pdo+enso+pdsi, dt[-j][state==1], deg=1, alpha=.8)
    lf2 <- locfit(x ~ amo+pdo+enso+pdsi, dt[-j][state==2], deg=1, alpha=.8)
    lf <- locfit(x ~ amo+pdo+enso+state+state_lag1, dt[-j], deg=1, alpha=.95)

    # the way thigs are set up, statepred is the probability of being in state 2
    r1 <- round(1-statepred)
    r2 <- round(statepred)

    #ensemble <- c(predict_plus_se_sample(lf1,dt[j],round(nsims*r1)),
    #              predict_plus_se_sample(lf2,dt[j],round(nsims*r2)))
    ensemble <- predict_plus_se_sample(lf,dt[j],nsims)
    ensembles <- rbind(ensembles,
        data.table(date=dt[j,date], value=ensemble)
    )

    predictions <- rbind(predictions, data.table(
        date=dt[j,date], state=dt[j,state], statep=statepred))

}
close(pb)

predictions$state=factor(predictions$state)

p <- ggplot(dt)+
    geom_bar(data=predictions,aes(x=date,y=statep,fill=state),stat='identity')+
    theme_bw()
print(p)

p2 <- ggplot(dt)+
    geom_boxplot(data=ensembles,aes(x=date,y=value,group=date),outlier.colour=NA)+
    geom_line(aes(x=date,y=x))+
    theme_bw()+coord_cartesian(ylim=c(0,30))
print(p2)