library('RSQLite') library('ggplot2') library('data.table') library('reshape2') library('plyr') library('e1071') library('sm') library('tikzDevice') library('HiddenMarkov') library('zoo') library('ggthemes') library('scales') library('dplyr') source('lib.R') # This file is the last step in the HMM simulation analysis # it relies on tables that are created by other scripts # exiting in the simulation database. dbname <- 'hmm_sim.db' m <- dbDriver('SQLite') con <- dbConnect(m, dbname = dbname) # this table created by 'hmm-sim.R' simulations <- as.data.table(dbReadTable(con,'simulations')) dens <- as.data.table(dbReadTable(con,'simulation_pdfs')) # this table is generated in 'hmm-input-data.R' dt <- as.data.table(dbReadTable(con,'input_data')) # these two table are created in 'hmm-sim-wavelet.R' local_spec_med <- as.data.table(dbReadTable(con, 'wavelet_sim_median')) spec_bounds <- as.data.table(dbReadTable(con, 'wavelet_sim_sawp_bounds')) spec_obs <- wavelet_dt(dt$x,timelab=dt$date) obs_scale <- wavelet_scale(dt$x) obs_power <- wavelet_power(dt$x) coi <- wavelet_default(dt$x)$coi[-(1:2)] coi_time <- dt$date[-(1:2)] plot_dir <- '../hmm-sim-paper/plots/' options(warn=-1) pars <- get.named.parlist(dt$x, 2, 'gamma', 'same.sd') hmm <- dthmm(dt$x, Pi_init(2), delta_init(2), 'gamma', pars) sink('/dev/null') hmm <- BaumWelch(hmm) sink() #tikz(file.path(plot_dir,'sim-lengths-cum-mean.tex'),width=pc2in(21),height=pc2in(22),pointsize=10) #cumsum.after <- 5 #max.plot <- 14 #runs <- rl.count(as.vector(x),ns) #mean.counts <- rbind( # apply(ar.1.stats$counts,2,mean), # apply(hmm.2.stats$counts,2,mean) # #c(runs$count,rep(0,count.max-length(runs$count))) # ) #colnames(mean.counts) <- 1:count.max #rownames(mean.counts) <- c('AR1','HM2G')#, 'Obs')#, 'NHM') #mean.counts.pdf <- mean.counts[,(cumsum.after + 1):max.plot] #mean.counts.cdf <- t(apply(mean.counts[,!(colnames(mean.counts) %in% 1:(cumsum.after))],1,cumsum))[,1:(max.plot-cumsum.after)] #colnames(mean.counts.cdf) <- (cumsum.after + 1):max.plot #layout(cbind(1:2)) #par(mar=c(3,4,1,1)+.1) #barplot(mean.counts.pdf,beside=T,legend=T,xlab='Run Length',ylab='Mean Count', # ylim=c(0,max(mean.counts.pdf+.051))) #barplot(mean.counts.cdf,beside=T,xlab='Run Length',ylab='Mean Count', # ylim=c(0,max(mean.counts.cdf+.051)))#c('AR1','HM2G','HM2N'), # #args.legend=list(x='bottomright',bg='white')) #mtext('Run Length', 1,2) #dev.off() rl_obs <- rl.count(dt$x,2) obs_rl <- data.frame(x=rl_obs$len[-1],y=rl_obs$count[-1]) rl = dplyr::summarise(group_by(simulations[kind=='HMC'],kind,sim), rl=rl.count(value,2)$count[-1],n=rl.count(value,2)$len[-1]) p_rl <- ggplot(rl[n<8])+ geom_boxplot(aes(x=factor(n),y=rl),outlier.colour=NA)+ theme_bw()+ geom_point(data=obs_rl, aes(x=x-1,y=y), size=3)+ geom_line(data=obs_rl, aes(x=x-1,y=y))+ xlab('Run Length')+ylab('Number of Runs')+ theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) ggsave_tikz(file.path(plot_dir,'sim-run-length.tex'),width=pc2in(20),height=pc2in(15),pointsize=10) #tikz(file.path(plot_dir,paste(dataset,dist,'sim-lengths-density.tex',sep='-')),width=pc2in(20),height=pc2in(20),pointsize=10) # upper.hmm <- apply(hmm.2.stats$dens,2,quantile,.99) # lower.hmm <- apply(hmm.2.stats$dens,2,quantile,.01) # med.hmm <- apply(hmm.2.stats$dens,2,quantile,.5) # # d.hmm.sim <- density(hmm.2.stats$runs[1,],bw=1,from=1,to=count.max,na.rm=T) # d.hist <- density(rl.count(as.vector(meko),3)$rl,bw=1,from=1,to=13) # hxp <- d.hmm.sim$x # # plot(hxp,upper.hmm,xlim=c(1,10),type='n',xlab='Run Lengths',ylab='Probability Density') # grid(lty=1,lwd=.5,col=gray(.9)) # polygon(c(hxp,rev(d.hmm.sim$x)),c(upper.hmm,rev(lower.hmm)),col='lightgray',border=NA) # lines(d.hist,col='red') # legend('topright',c('HM2G','Historical'),lty=c(0,1,1), # col=c('lightgray','red'),pch=c(22,-1,-1),pt.bg='lightgray',bg='white') #dev.off() printf('Simulation Statistics...') stats <- as.data.table(melt(ddply(simulations, .(sim,kind), summarise, Mean=mean(value), Minimum=min(value), Maximum=max(value), `Standard Deviation`=sd(value), Skewness=skewness(value), `Lag 1 AC`=lag1(value)), id.vars=c('sim','kind'))) obs <- obs_stats(dt$x) limits <- ddply(stats,.(kind,variable),summarise, value=quantile(value,c(.05,.25,.5,.75,.95)), limit=c('ymin','lower','middle','upper','ymax')) limits <- as.data.table(dcast(limits,kind+variable~limit)) p_stats <- ggplot(limits[kind!='AR'&kind!='HMI'],aes(ymin=ymin,lower=lower,middle=middle,upper=upper,ymax=ymax))+ geom_boxplot(aes(x=kind,group=kind),stat='identity',outlier.colour=NA,width=.8)+ facet_wrap(~variable,scales='free_y')+ geom_hline(data=obs,aes(yintercept=value,group=variable),linetype=2)+ theme_bw()+ xlab('')+ylab('')+ theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) ggsave_tikz(file.path(plot_dir,'sim-stats.tex'),width=pc2in(39),height=pc2in(24.4),pointsize=10) ###################################################################################################### ###################################################################################################### printf('Simulation PDF...') evalp <- seq(0,30,.5) d <- sm.density(dt$x, eval.points=evalp,display='none')$estimate limitsd <- ddply(dens[kind=='HMC'],.(x),summarise, value=fivenum(density), limit=c('ymin','lower','middle','upper','ymax')) limitsd <- dcast(limitsd,x~limit) p_pdf <- ggplot(limitsd,aes(ymin=ymin,lower=lower,middle=middle,upper=upper,ymax=ymax))+ geom_boxplot(aes(x=x,group=x),stat='identity',outlier.colour=NA,width=.4)+ geom_line(aes(x=evalp,y=d),col='#0072B2',size=1.5)+ theme_bw()+ xlab('Flow Volume [MAF]')+ylab('Probability Density') + theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) #ggplot(dens[kind=='climate'])+ # geom_boxplot(aes(x=x,y=density,group=x),outlier.colour=NA,width=.4)+ # geom_line(aes(x=evalp,y=d),col='#0072B2',size=1.5)+ # theme_bw()+ # xlab('Flow Volume [MAF]')+ylab('Probability Density') ggsave_tikz(file.path(plot_dir,'sim-density.tex'),p_pdf,width=pc2in(20),height=pc2in(15),pointsize=10) ###################################################################################################### ###################################################################################################### printf('HMM Decoding...') p1_decoding <- ggplot()+ geom_bar(aes(x=dt$date,y=dt$state),width=.5,stat='identity')+ xlab('Time')+ylab('State')+ theme_bw()+ coord_cartesian(ylim=c(.95,2.05)) + scale_y_continuous(breaks=seq(1,2,1)) + theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) windows = data.frame(x1=c(1906,1930,1982,1987),x2=c(1929,1981,1986,2009)) ys = data.frame(y1=numeric(nrow(windows)),y2=numeric(nrow(windows))) for(i in 1:nrow(windows)) ys$y1[i] = ys$y2[i] = mean(dt[date>=windows[i,1] & date