############################################################ ############################################################ # myboxplot.matrix: # Modified version of the boxplot.matrix function to call myboxplot # which in turn calls myboxplot.stats to to 5th and 95th percentiles myboxplot.matrix <- function (x, use.cols = TRUE, ...) { groups <- if (use.cols) split(x, rep.int(1L:ncol(x), rep.int(nrow(x), ncol(x)))) else split(x, seq(nrow(x))) if (length(nam <- dimnames(x)[[1 + use.cols]])) names(groups) <- nam invisible(myboxplot(groups, ...)) } ############################################################ ############################################################ # myboxplot: # Modified version of the boxplot function which calls myboxplot.stats for # 5th and 95th percentiles instead if default 1.5*IQR myboxplot <- function (x, ..., range = 1.5, width = NULL, varwidth = FALSE, notch = FALSE, outline = TRUE, names, plot = TRUE, border = par("fg"), col = NULL, log = "", pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5), horizontal = FALSE, add = FALSE, at = NULL) { args <- list(x, ...) namedargs <- if (!is.null(attributes(args)$names)) attributes(args)$names != "" else rep(FALSE, length.out = length(args)) groups <- if (is.list(x)) x else args[!namedargs] if (0L == (n <- length(groups))) stop("invalid first argument") if (length(class(groups))) groups <- unclass(groups) if (!missing(names)) attr(groups, "names") <- names else { if (is.null(attr(groups, "names"))) attr(groups, "names") <- 1L:n names <- attr(groups, "names") } cls <- sapply(groups, function(x) class(x)[1L]) cl <- if (all(cls == cls[1L])) cls[1L] else NULL for (i in 1L:n) groups[i] <- list(myboxplot.stats(unclass(groups[[i]]), range)) stats <- matrix(0, nrow = 5L, ncol = n) conf <- matrix(0, nrow = 2L, ncol = n) ng <- out <- group <- numeric(0L) ct <- 1 for (i in groups) { stats[, ct] <- i$stats conf[, ct] <- i$conf ng <- c(ng, i$n) if ((lo <- length(i$out))) { out <- c(out, i$out) group <- c(group, rep.int(ct, lo)) } ct <- ct + 1 } if (length(cl) && cl != "numeric") oldClass(stats) <- cl z <- list(stats = stats, n = ng, conf = conf, out = out, group = group, names = names) if (plot) { if (is.null(pars$boxfill) && is.null(args$boxfill)) pars$boxfill <- col do.call("bxp", c(list(z, notch = notch, width = width, varwidth = varwidth, log = log, border = border, pars = pars, outline = outline, horizontal = horizontal, add = add, at = at), args[namedargs])) invisible(z) } else z } #This function replaces boxplot.stats --- it uses the middle 50% and #middle 90% instead of middle 50%(IQR) and 1.5*IQR. myboxplot.stats <- function (x, coef = NULL, do.conf = TRUE, do.out = TRUE) { nna <- !is.na(x) n <- sum(nna) stats <- quantile(x, c(.05,.25,.5,.75,.95), na.rm = TRUE) iqr <- diff(stats[c(2, 4)]) out <- x < stats[1] | x > stats[5] conf <- if (do.conf) stats[3] + c(-1.58, 1.58) * diff(stats[c(2, 4)])/sqrt(n) list(stats = stats, n = n, conf = conf, out = x[out & nna]) } laglead <- function(x,q,r){ ## laglead: ## This function gives all the chunks of a vector of length q ## followed by chunks of length r. It gives back a list of two ## matricies, lag and lead, essentially the lag q matrix ## and the subsequent r values. ## ## Inputs: ## x - A vetor ## q - lead length ## r - lag length ## ## Ex. ## ## laglead(1:10,3,2) ## $x ## [,1] [,2] [,3] ## [1,] 1 2 3 ## [2,] 2 3 4 ## [3,] 3 4 5 ## [4,] 4 5 6 ## [5,] 5 6 7 ## [6,] 6 7 8 ## ## $y ## [,1] [,2] ## [1,] 4 5 ## [2,] 5 6 ## [3,] 6 7 ## [4,] 7 8 ## [5,] 8 9 ## [6,] 9 10 n <- length(x) lag <- matrix(NA, n-q-r+1, q) lead <- matrix(NA, n-q-r+1, r) for(i in 1:(n-q-r+1)){ lag[i,] <- x[i:(i+q-1)] lead[i,] <- x[(i+q):(i+q+r-1)] } return(list(lag=lag,lead=lead)) } ts.annual.mean <- function(x){ options(warn=-1) nyears <- length(unique(floor(time(x)))) syear <- start(x)[1] nst = vector('numeric',nyears) for(i in 1:nyears) nst[i] <- mean(window(x,syear - 1 + i, c(syear - 1 + i, frequency(x)))) nst <- ts(nst, start = c(syear,1), frequency = 1) return(nst) } ts.seasonal.mean <- function(x, s=1, e=frequency(x), include.incomplete=FALSE){ options(warn=-1) nyears <- length(unique(floor(time(x)))) ind <- 1:nyears if(start(x)[2] > e) ind <- ind[-1] if(end(x)[2] < s) ind <- ind[-length(ind)] syear <- start(x)[1] eyear <- end(x)[1] years <- (syear:eyear)[ind] nst <- vector('numeric',length(ind)) interyear <- ifelse(e < s, TRUE, FALSE) len <- ifelse(e < s, e+frequency(x)-s+1,(e-s+1)) #first.year.complete <- length(which(floor(time(x)) == syear)) == frequency(x) #last.year.complete <- length(which(floor(time(x)) == eyear)) == frequency(x) for(i in ind){ #browser() this.season <- window(x,c(syear - 1 + i, s),c(syear + ifelse(interyear,0,-1) + i, e)) nst[i] <- ifelse(length(this.season)==len,mean(this.season),NA) } nst <- ts(nst, start = c(years[1],1), frequency = 1) return(nst) } ts.next.time <- function(x){ en <- end(x) en <- if(en[2] == frequency(x)) c(en[1] + 1, 1) else c(en[1], en[2] + 1) return(en) } ts.order.start <- function(x,start.per){ f = frequency(x) ord <- if(start.per == 1) start.per:f else if(start.per == f) c(f, 1:(f - 1)) else c(start.per:f, 1:(start.per - 1)) return(ord) } ts.sim.and.calc <- function(x, order, annual.mean = TRUE, meanvec=NULL, nsims = 10000){ x.fit <- arima(x,order=order) x.sim <- arima.sim(list(ar = coef(x.fit)), nsims, n.start = 100, sd = sqrt(x.fit$sigma2)) x.sim <- ts(x.sim, start = ts.next.time(x), frequency = frequency(x)) if(annual.mean) x.sim <- ts.annual.mean(x.sim + meanvec) return(x.sim) } wapply <- function(x, fun, win.length = length(x), ...){ if(is.null(ncol(x))){ return(wapply.single(x, fun, win.length, ...)) }else{ for(i in 1:ncol(x)){ if(i == 1){ z <- wapply.single(x[,1], fun, win.length, ...) }else{ z <- cbind(z,wapply.single(x[,i], fun, win.length, ...)) } } attributes(z)$dimnames[[2]] <- attributes(x)$dimnames[[2]] return(z) } } wapply.single <- function(x, fun, win.len = length(x), ...){ r <- (length(x) %% win.len) if(r > 0) x <- x[1:(length(x)-r)] stack <- matrix(x,nrow = win.len) return(apply(stack,2,fun,...)) } stack.ts <- function(x){ matrix(pad.ts(x),ncol=frequency(x),byrow=T) } pad.ts <- function(x){ ts(c(rep(NA,start(x)[2]-1),x,rep(NA,frequency(x)-end(x)[2])), start=c(start(x)[1],1), frequency=frequency(x)) } fcontour <- function (x = seq(0, 1, length.out = nrow(z)), y = seq(0, 1, length.out = ncol(z)), z, left=TRUE, xlim = range(x, finite = TRUE), ylim = range(y, finite = TRUE), zlim = range(z, finite = TRUE), levels = pretty(zlim, nlevels), nlevels = 20, color.palette = terrain.colors, col = color.palette(length(levels) - 1), plot.title, plot.axes, xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes, ...) { if (missing(z)) { if (!missing(x)) { if (is.list(x)) { z <- x$z y <- x$y x <- x$x } else { z <- x x <- seq.int(0, 1, length.out = nrow(z)) } } else stop("no 'z' matrix specified") } else if (is.list(x)) { y <- x$y x <- x$x } if (any(diff(x) <= 0) || any(diff(y) <= 0)) stop("increasing 'x' and 'y' values expected") mar.orig <- (par.orig <- par(c("mar", "las")))$mar par(las = las) mar <- mar.orig on.exit(par(par.orig)) if (left){ mar[4]<- 1 } else{ mar[2]<- 1 mar[4]<- 4.1 } par(mar=mar) plot.new() plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs) if (!is.matrix(z) || nrow(z) <= 1 || ncol(z) <= 1) stop("no proper 'z' matrix specified") if (!is.double(z)) storage.mode(z) <- "double" .Internal(filledcontour(as.double(x), as.double(y), z, as.double(levels), col = col)) if (missing(plot.axes)) { if (axes && left) { title(main = "", xlab = "", ylab = "") Axis(x, side = 1) Axis(y, side = 2) } else if(axes){ title(main = "", xlab = "", ylab = "") Axis(x, side = 1) Axis(y, side = 4) } } else plot.axes if (frame.plot) box() if (missing(plot.title)) title(...) else plot.title invisible() } cbar <- function ( nlevels, zlim, rotate=TRUE, levels = pretty(zlim, nlevels), color.palette = terrain.colors, col = color.palette(length(levels) - 1), key.title, key.axes, xaxs = "i", yaxs = "i", las = 1, axes = TRUE) { mar.orig <- (par.orig <- par(c("mar", "las")))$mar par(las = las) mar <- mar.orig on.exit(par(par.orig)) plot.new() if(rotate){ mar[4] <- 2 mar[2] <- 2 par(mar = mar) plot.window(xlim = c(0, 1), ylim = range(levels), xaxs = "i", yaxs = "i") rect(0, levels[-length(levels)], 1, levels[-1], col = col) if (missing(key.axes)) { if (axes) axis(4) } else key.axes } else { mar[1] <- mar[3] mar[3] <- 2 par(mar = mar) plot.window(xlim = range(levels), ylim = c(0,1), xaxs = "i", yaxs = "i") rect( levels[-length(levels)], 0, levels[-1], 1, col = col) if (missing(key.axes)) { if (axes) axis(1) } else key.axes } box() if (!missing(key.title)) key.title } sizer <- function(rotate=TRUE){ mar <- par("mar") if(rotate){ w <- (3 + mar[2]) * par("csi") * 2.54 } else{ w <- (3+mar[3]) * par("csi") * 2.54 } return(w) } wapply <- function(x, fun, win.len = length(x), ...){ ## wapply: ## ## applies a function to each succesive window of a time series; ## good for things like annual means from monthly values. ## note: does not return a ts object ## ## Inputs: ## ## x - A timeseries ## fun - function name to apply ## win.len - length of window to apply fun to, defaults to length(x) ## ## Ex. ## ## wapply(ts(rnorm(12*10),frequency = 12),mean,12) ## [1] -0.10910749 0.20212615 0.89948987 -0.27514597 0.18479893 0.55860823 ## [7] 0.38323747 0.08271312 0.26844408 0.03115793 ## r <- (length(x) %% win.len) if(r > 0) x <- x[1:(length(x)-r)] stack <- matrix(x,nrow = win.len) return(apply(stack,2,fun,...)) } binary.ts <- function(x, limit = median(x), tie = 1 ){ # returns a binary representation of x, 1 above limit, 0 below # # Ex. # # binary.ts(1:10) ## [1] 0 0 0 0 0 1 1 1 1 1 # binary.ts(1:10,8) ## [1] 0 0 0 0 0 0 0 1 1 1 # binary.ts(1:10,8,tie=0) ## [1] 0 0 0 0 0 0 0 0 1 1 b <- integer(length(x)) filter <- if(tie == 1) x >= limit else x > limit #only need to set the 1's because b is already 0's b[filter] <- 1L if(class(x) == 'ts') return(ts(b,start=start(x),end=end(x))) else return(b) } ntile.ts <- function(x, n, limit.type = 'prob', tie = 1, altobs = NULL ){ # returns an integer vector corresponding to n states broken by equal # probability or equal distance # limit <- if(limit.type == 'prob') quantile(x,seq(0,1,1/n)) else if(limit.type == 'equal') seq(min(x),max(x),by=diff(range(x))/n) if(!is.null(altobs)) limit <- quantile(altobs,seq(0,1,1/n)) b <- integer(length(x)) for(i in 1:(n+1)){ filter <- if(tie == 1) x >= limit[i] & x <= limit[i+1] else x > limit[i] & x <= limit[i+1] #only need to set the 1's because b is already 0's b[filter] <- as.integer(i-1) } if(class(x) == 'ts') return(ts(b,start=start(x),end=end(x))) else return(b) } binarys <- function(i,align=F){ # returns the binary representation of integer vector i (coerced) # as a character vector # # Ex. # # binarys(1:10) ## [1] "1" "10" "11" "100" "101" "110" "111" "1000" "1001" "1010" # binarys(1:10,align=T) ## [1] "0001" "0010" "0011" "0100" "0101" "0110" "0111" "1000" "1001" "1010" # bb <- function(i) if (i) paste(bb(i %/% 2), i %% 2, sep="") else "" #number of binary digits of largest number width <- ceiling(log(max(i)+1,base=2)) #get the character vector i <- sapply(as.integer(i),bb) if(align) return(sprintf(paste('%0',width,'d',sep=''),as.integer(i))) else return(i) } rows.equal <- function(x, y){ if(is.null(dim(x))) x <- rbind(x) which( as.logical( apply(x, 1, all.equal, y))) } trprob <- function(lag,lead,ns = max(lead)+1){ require(gtools) q <- ncol(lag) r <- ncol(lead) states.lag <- permutations(ns,q,0:(ns-1),repeats.allowed=T) states.lead <- permutations(ns,r,0:(ns-1),repeats.allowed=T) nfrom <- nrow(states.lag) nto <- nrow(states.lead) nc <- ns^(q+r) tpm <- list( from = matrix(NA,nrow=nc,ncol=q), to = matrix(NA,nrow=nc,ncol=r), p = matrix(NA,nrow=nfrom,ncol=nto), cor = matrix(NA,nrow=nfrom,ncol=nto), pts = matrix(NA,nrow=nfrom,ncol=nto), sig = matrix(NA,nrow=nfrom,ncol=nto), pval = matrix(NA,nrow=nfrom,ncol=nto), gcv = matrix(NA,nrow=nfrom,ncol=nto), pools = list() ) pb <- txtProgressBar(1,nrow(states.lag),style=3) n <- 0 tpm$pools$to <- tpm$pools$from <- list() rownames(tpm$p) <- character(nfrom) colnames(tpm$p) <- character(nto) for(i in 1:nfrom){ setTxtProgressBar(pb,i) #all rows starting in a particular state pool <- rows.equal(lag,states.lag[i,]) tpm$pools$from[[i]] <- pool tpm$pools$to[[i]] <- list() attr(tpm$pools$from[[i]],'state.from') <- states.lag[i,] for(j in 1:nto){ n <- n + 1 # record the current to and from states tpm$from[n,] <- states.lag[i,] tpm$to[n,] <- states.lead[j,] #matches <- rows.equal(cbind(lead[pool,]), states.lead[j,]) this.state <- rbind(states.lead[j,]) # turn vectors (single state to) into matricies, # rbind handles the case of one entry only in the pool, # but multiple to states this.pool <- if(r==1) cbind(lead[pool,]) else rbind(lead[pool,]) # If we get this condition, something is wrong. if(ncol(this.state) != ncol(this.pool))browser() # find the matching rows matches <- if(nrow(this.pool) == 0){ # then we're empty integer(0) }else{ find.matches(this.state, this.pool, maxmatch=nrow(lag))$matches } # find.matches returns a single 0 if there are no matches if(length(matches) == 1) if( matches == 0 ) matches <- integer(0) # record the transition probability possible <- nrow(this.pool) tpm$p[i,j] <- length( matches ) / possible # set the transition state names, this sets them way too much # but it doesnt hurt anything rownames(tpm$p)[i] <- paste(tpm$from[n,],collapse='') colnames(tpm$p)[j] <- paste(tpm$to[n,],collapse='') #save the indexes of the states from and to tpm$pools$to[[i]][[j]] <- as.vector(matches) attr(tpm$pools$to[[i]],'state.from') <- states.lag[i,] attr(tpm$pools$to[[i]][[j]],'state.to') <- states.lead[j,] #browser() } } close(pb) return(tpm) } binary.combos <- function(n){ as.matrix( expand.grid( rep( list(c(T,F)), n ) ) ) } sim.pqr <- function(tr.paleo, conditional.pool = FALSE){ if(conditional.pool){ #simulate transition rand <- runif(1) which.to <- rank(c(rand,cumsum(this.p)))[1] state.to <- tr.paleo$to[which.to,] #conditional pool, quantiles given state.to pool.from <- tr.paleo$pools$from[[ which.from[which.to] ]] pool.to <- tr.paleo$pools$to[[ which.from[which.to] ]] pool.from <- matrix(pool.from,,q) pool.to <- matrix(pool.to,,r) } } mylag <- function(x,lag,docor=FALSE){ if(lag>length(x)) warning("Lag is larger than input vector length, returning NA's") if(lag<0) lagn = c(rep(NA,abs(lag)),x[-(length(x):(length(x)+lag+1))]) if(lag==0) lagn = x if(lag>0) lagn = c(x[-(1:lag)],rep(NA,lag)) remove = !is.na(lagn) & !is.na(x) if(docor){ return(cor(x[remove],lagn[remove])) }else{ return(lagn) } } searchCriteria <- function(x, lag.max.ami = 200, lag.max.acf = 400, d.max = 7, show = T){ require(tseriesChaos) acf <- acf(x, lag.max = lag.max.acf,plot=F) acf <- acf$acf[,,1] ami <- mutual(x, lag.max = lag.max.ami, plot=F) T.acf.picked <- T.ami.picked <- FALSE for(i in 1:max(c(lag.max.acf,lag.max.ami))){ if(i == max(c(lag.max.acf,lag.max.ami))) stop('One of the T criteria is monotonically decreasing, pick a lager lag.max.') if(!T.acf.picked){ if(acf[i+1] > acf[i]){ T.acf <- i T.acf.picked <- TRUE } } if(!T.ami.picked){ if(ami[i+1] > ami[i]){ T.ami <- i T.ami.picked <- TRUE } } if(T.acf.picked & T.ami.picked) break } cat('The lag suggested by the ACF criteria is:', T.acf,'\n') cat('The lag suggested by the AMI criteria is:', T.ami,'\n') fnn.acf <- false.nearest(x, m = d.max, d = T.acf, t=length(x)/10, eps=sd(x)) fnn.ami <- false.nearest(x, m = d.max, d = T.ami, t=length(x)/10, eps=sd(x)) d.acf <- order(abs(fnn.acf[1,1:(d.max-1)]-fnn.acf[1,2:d.max]))[1] d.ami <- order(abs(fnn.ami[1,1:(d.max-1)]-fnn.ami[1,2:d.max]))[1] cat('The FNN embedding dimension suggested by the ACF lag is:', d.acf,'\n') cat('The FNN embedding dimension suggested by the AMI lag is:', d.ami,'\n') #calculate correlation integrals cint <- d2(x, m=d.max, d=T.ami, t=.1, eps.min=1) invisible(list(T.acf=T.acf,T.ami=T.ami,d.acf=d.acf,d.ami=d.ami,cint=cint)) } plot.d2.cam <- function (x, ...){ m <- ncol(x) plot(x[, c(1, m)], type = "l", log = "xy",...) for (i in (m - 1):2) lines(x[, c(1, i)]) } myscale <- function(x){ (x-mean(x))/sd(x) } build.pools.from <- function(tr, qr){ # Builds from pools in terms of actual values. # tr is an object from trprob() # qr is an object from laglead() nfrom <- nrow(unique(tr$from)) pools.from <- vector('list',nfrom) for( i in 1:nfrom ) pools.from[[i]] <- cbind(qr$lag[ tr$pools$from[[i]], ]) return(pools.from) } build.pools.to <- function(tr, qr){ # Builds to pools in terms of actual values. # tr is an object from trprob() # qr is an object from laglead() nfrom <- nrow(unique(tr$from)) nto <- nrow(unique(tr$to)) q <- ncol(tr$from) r <- ncol(tr$to) pools.to <- vector('list',nto) for( i in 1:nfrom ){ pools.to[[i]] <- list() for( j in 1:nto ){ #ito <- (i-1)*nto+j #browser() # filter first by the from state then the to state pools.to[[i]][[j]] <- if(r == 1) cbind(cbind(qr$lead[ tr$pools$from[[i]], ])[tr$pools$to[[i]][[j]], ]) else rbind(rbind(qr$lead[ tr$pools$from[[i]], ])[tr$pools$to[[i]][[j]], ]) } } return(pools.to) } RPSS <- function(histobs, ensemble, altobs=NULL, ncat=3){ # This function calculates the Rank Probability Skill Score(RPSS) # for each prediction year and gives the RPSS for 'wet', 'dry' and all years # 'wet' is in upper 67th percentile, dry is in lower 33rd percentile nyr <- length(histobs) #Number of Years Record rpss <- 1:nyr #Equivalent size to number of years for(j in 1:nyr) { obsval <- histobs[j] #Observed value of that particular year #remove the year we are on to get calibrated data calibdata <- histobs[-j] # Find 33rd and 67th percentile of calibrated data breaks <- seq(0,1,1/ncat)[-c(1,ncat+1)] quants <- if(!is.null(altobs)) quantile(altobs,breaks) else quantile(calibdata,breaks) lq <- quants[1] uq <- quants[2] probs <- pclim <- obscat <- numeric(ncat) # assigns which category each of the simulations # falls in for whole ensemble if(typeof(ensemble) == 'list'){ for(i in 1:ncat){ fun <- if(ncol(ensemble[[j]]$y) == 1 || is.null(ncol(ensemble[[j]]$y))){ approxfun(x=ensemble[[j]]$x,y=ensemble[[j]]$y) }else{ approxfun(x=ensemble[[j]]$x,y=ensemble[[j]]$y[,1]) } maxv <- max(ensemble[[j]]$x) minv <- min(ensemble[[j]]$x) lower <- ifelse(i==1, max(minv,0), max(minv,quants[i-1])) upper <- ifelse(i==ncat, maxv, min(maxv,quants[i])) #print(j) #browser() probs[i] <- if(lower > upper || upper < lower) 0 else integrate(fun,lower,upper,subdivisions=1000)$value #probs[i] <- integrate(fun,lower,upper,subdivisions=1000)$value } }else{ probs <- table(ntile.ts(ensemble[,j],ncat,altobs=altobs))/length(ensemble[,j]) } probs <- cumsum(probs) # raw probability of falling into each category # 33% chance to fall in each one of 3 categories pclim <- cumsum(rep(1,ncat)/ncat) # where the obsevation actually falls obscat <- rep(0,ncat) obscat[rank(c(obsval,quants))[1]] <- 1 obscat <- cumsum(obscat) rpss[j]=1-(sum((obscat-probs)^2))/(sum((obscat-pclim)^2)) } return(rpss) } rpss_single <- function(ensemble, actual, histobs, ncat=3){ # This function calculates the Rank Probability Skill Score(RPSS) # for each prediction year and gives the RPSS for 'wet', 'dry' and all years # 'wet' is in upper 67th percentile, dry is in lower 33rd percentile # Find 33rd and 67th percentile of calibrated data breaks <- seq(0,1,1/ncat)[-c(1,ncat+1)] quants <- quantile(histobs,breaks) probs <- pclim <- obscat <- numeric(ncat) probs <- table(factor(ntile.ts(ensemble,ncat,altobs=histobs),levels=0:(ncat-1)))/length(ensemble) probs <- cumsum(probs) # raw probability of falling into each category # 33% chance to fall in each one of 3 categories pclim <- cumsum(rep(1,ncat)/ncat) # where the obsevation actually falls obscat <- rep(0,ncat) obscat[rank(c(actual,quants))[1]] <- 1 obscat <- cumsum(obscat) rpss=1-(sum((obscat-probs)^2))/(sum((obscat-pclim)^2)) return(rpss) } oneoverk <- function(k){ W <- numeric(k) for(j in 1:k) W[j] <- 1/j W <- W/sum(W) W } ############################################################ ############################################################ # sigcor: # Returns the minimum significant correlation at the # given alpha value sigcor <- function(n,alpha=.05){ k <- qnorm(alpha)^2/(n-2) sqrt(k/(k+1)) } #returns the best alpha and degree best.par <- function(x,y, a = seq(0.2,1.0,by=0.05), n = length(a), f=c(gcvplot,aicplot),...){ require(locfit) options(warn=-1) # get the gcv values for all combinations of deg and alpha d1 <- try(f(y~x, deg=1, alpha=a, kern='bisq', scale=T,...)$values,silent=T) if(class(d1) == "try-error") d1 <- numeric() d2 <- try(f(y~x, deg=2, alpha=a, kern='bisq', scale=T,...)$values,silent=T) if(class(d2) == "try-error") d2 <- numeric() options(warn=0) gcvs <- c(d1,d2) if(length(gcvs)==0){ bestalpha <- bestd <- 1 gcv <- NA }else{ best <- order(gcvs)[1] #get the best alpha and degree bestd <- c(rep(1,n),rep(2,n))[best] bestalpha <- c(a,a)[best] gcv <- gcvs[best] } return(list(p=bestd,a=bestalpha,gcv=gcv)) } plot.sy.sims <- function(sims,hist,q,r,fn=NULL){ if(!is.null(fn)) pdf(fn,width=10,height=4) hist.plot <- list() layout(cbind(1:r)) for( s in 1:r ){ # the forst year simualted is the start year + q - 1 + s # (the number of years forecasted ahead), for example # if q = 3, we need three years back to initilize, so if the first year # year in the record is 1906 then the forst year we can forecast is # 1906 + 3 - 1 + 1 = 1909 colnames(sim[[s]]) <- (start(hist)[1]+q -1 ):(end(hist)[1]) + s hist.plot[[s]] <- window(hist,start(hist)[1] + q + s - 1) boxplot(sim[[s]],outline=F) #[,2:ncol(sim[[s]])] lines(1:length(hist.plot[[s]]),hist.plot[[s]],col='red',lwd=2,type='b',cex=.5) } if(!is.null(fn)) dev.off() invisible(hist.plot) } rank.histogram <- function(sim,hist){ ranks <- integer(length(hist)) for(i in 1:length(hist)){ ranks[i] <- rank(c(hist[i],sim[,i]),ties.method='first')[1] } hist(ranks) invisible(ranks) } # Read monthly data file with year in first column and months along rows # create a yearly timeseries aggregated april <---> march, the "runoff year" make.runoff.year <- function(file='data/LeesFerry.txt',start=c(1906,1)){ x <- ts(as.vector(t(as.matrix(read.table(file)[,-1]))),start=start,frequency=12) y <- numeric(0) chunk <- 4:15 for(i in 1:(length(unique(floor(time(x))))-1)){ y[i] <- sum(x[chunk]) chunk <- chunk + 12 } y <- ts(y,start=start,frequency=1) y } # modified from code Zucchini and McDonald (2009) to work with the # the HiddenMarkov package to do state prediciton hmm.state.prediction <- function(x, H = 1, ...) { require(HiddenMarkov) n <- length(x$x) m <- ncol(x$Pi) fb <- forwardback(x$x, x$Pi, x$delta, x$distn, x$pm) la <- t(fb$logalpha) c <- max(la[, n]) llk <- c + log(sum(exp(la[, n] - c))) statepreds <- matrix(NA, ncol = H, nrow = m) foo1 <- exp(la[, n] - llk) foo2 <- diag(m) for (i in 1:H) { foo2 <- foo2 %*% x$Pi statepreds[, i] <- foo1 %*% foo2 } statepreds } # generate forcest distributions for H steps out for a given hmm # model x, this function assumes a normal distributions for the # components hmm.forecast.dist <- function(x, H = 1, len=300, ...) { #lower <- qnorm(0.001, min(x$pm$mean),x$pm$sd[which.min(x$pm$mean)]) #upper <- qnorm(0.999, max(x$pm$mean),x$pm$sd[which.max(x$pm$mean)]) range <- extendrange(x$x,f=.4) if(range[1] < 0) range[1] <- 0 xrange <- seq(range[1],range[2],,len) n <- length(x$x) m <- ncol(x$Pi) # the probability density for each observation and each state dfun.hmm <- function(x,xrange=x$x){ m <- ncol(x$Pi) allprobs <- matrix(NA,nrow=length(xrange),ncol=m) np <- length(x$pm) fun <- paste('d',x$distn,sep='') for(i in 1:m){ allprobs[,i] <- if(np == 1){ eval(call(fun, xrange, x$pm[[1]][i])) }else if(np == 2){ #browser() if(x$distn == 'gamma'){ eval(call(fun, xrange, x$pm[[2]][i], x$pm[[1]][i])) }else{ eval(call(fun, xrange, x$pm[[1]][i], x$pm[[2]][i])) } }else if(np == 3){ eval(call(fun, xrange, x$pm[[1]][i], x$pm[[2]][i], x$pm[[3]][i])) } } allprobs } allprobs <- dfun.hmm(x) #allprobs <- outer(x$x, x$pm$mean, dnorm, x$pm$sd) allprobs <- ifelse(!is.na(allprobs), allprobs, 1) # do first calculation, alpha_T/(alpha_T * 1') # which uses the initial distribution foo <- x$delta * allprobs[1, ] sumfoo <- sum(foo) lscale <- log(sumfoo) foo <- foo/sumfoo # phi term from Zucchini (2009) equation 5.4 for (i in 2:n) { foo <- foo %*% x$Pi * allprobs[i, ] sumfoo <- sum(foo) lscale <- lscale + log(sumfoo) foo <- foo/sumfoo } # fc state probabilities, these are the epsilons in equation 5.5 xi <- matrix(NA, nrow = m, ncol = H) for (i in 1:H) { foo <- foo %*% x$Pi xi[, i] <- foo } allprobs.out <- dfun.hmm(x,xrange) # total probability of being in fdists <- allprobs.out %*% xi[, 1:H] list(x = xrange, y = fdists) } #boxplot the forecast distributions bases on their summary stats hmm.boxplot.fc <- function(hmm.fc,sy,H=1,...){ bx <- boxplot(matrix(0,nrow=1,ncol=length(hmm.fc)), plot = FALSE) for(i in 1:length(hmm.fc)){ fc <- if(ncol(hmm.fc[[i]]$y) == 1 || is.null(ncol(hmm.fc[[i]]$y))){ hmm.fc[[i]] }else{ list(x=hmm.fc[[i]]$x,y=hmm.fc[[i]]$y[,H]) } bx$stats[,i] <- hmm.fcdist.fivenum(fc) } bx$names <- paste(sy:(sy+length(hmm.fc)-1)) bxp(bx,...) } # takes a list of the forecast distribution for each year and # generated ensembles from them by sampling hmm.ensemble.fc <- function(hmm.fc,sy,nsims=1000,...){ years <- sy:(sy+length(hmm.fc)-1) H <- ncol(hmm.fc[[1]]$y) ens <- if(H ==1) array(NA,c(nsims,length(hmm.fc))) else array(NA,c(nsims,length(hmm.fc),H)) pb <- txtProgressBar(1,length(hmm.fc),style=3) for(i in 1:length(hmm.fc)){ setTxtProgressBar(pb,i) if( H == 1 ) ens[,i] <- hmm.fcdist.sim(hmm.fc[[i]],nsims) else for(j in 1:H) ens[,i,j] <- hmm.fcdist.sim(list(x=hmm.fc[[i]]$x,y=cbind(hmm.fc[[i]]$y[,j])),nsims) } close(pb) names(ens) <- paste(years) ens } # returns 5th, 25th, 50th, 75th and 95th percentile # of a given forecast distribution hmm.fcdist.fivenum <- function(fcdist){ pts <- c(.05,.25,.5,.75,.95) fivenum <- numeric(5) for(i in 1:5){ cdf <- cumsum(fcdist$y) cdf <- cdf/max(cdf) fivenum[i] <- approx(cdf,fcdist$x,pts[i])$y } fivenum } # simulate nsim values from a forecast distribution hmm.fcdist.sim <- function(fcdist,nsim=1000){ pts <- runif(nsim) cdf <- cumsum(fcdist$y[,1]) cdf <- cdf/max(cdf) sim <- numeric(nsim) for(i in 1:nsim){ sim[i] <- approx(cdf,fcdist$x,pts[i])$y while(is.na(sim[i])) sim[i] <- approx(cdf,fcdist$x,runif(1))$y } sim } # plot transition probabilities over time for the 2 state models only # hmm is a list of fitted models # sy is the start year for plotting plot.hmm.trprob.2state <- function(hmm,sy){ #ww <- wd <- dw <- dd <- numeric(length(hmm)) require(ggplot2) theme_set(theme_bw()) myopts <- opts( panel.grid.major = theme_blank(), panel.grid.minor = theme_blank(), axis.text.x = theme_text(size = 11), axis.text.y = theme_text(size = 11,angle=90), axis.title.x = theme_text(size = 11), axis.title.y = theme_text(size = 11,angle=90), panel.background = theme_rect()) tr <- matrix(NA,ncol=4,nrow=length(hmm)) for(i in 1:length(hmm)){ tr[i,1] <- hmm[[i]]$Pi[1,1] tr[i,2] <- hmm[[i]]$Pi[1,2] tr[i,3] <- hmm[[i]]$Pi[2,1] tr[i,4] <- hmm[[i]]$Pi[2,2] } rownames(tr) <- sy:(sy+length(hmm)-1) colnames(tr) <- c("ww", "wd", "dw", "dd" ) tr <- melt(tr) names(tr) <- c('Time','Transition','Probability') qplot(Time,Probability,data=tr,geom="line",col=Transition,linetype=Transition) + myopts + opts(legend.position = c(0.2, 0.5)) + ylab('Hidden State Transition Probability') + xlab('Time') } get.named.parlist <- function(x,m,dist,ic,...){ require(MASS) fit <- fitdistr(x,dist,...) np <- length(fit$estimate) pars <- vector('list',np) names(pars) <- names(fit$estimate) for(j in 1:m){ this.fit <- fitdistr(x[ntile.ts(x,m) == (j-1)],dist,...) #for(k in 1:np) # pars[[k]][j] <- this.fit$estimate[k] for(k in 1:np) pars[[k]][j] <- fit$estimate[k] if(dist == 'normal'){ if(ic == 'same.both'){ pars[[k]][j] <- mean(x) pars[[k]][j] <- sd(x) } else if( ic == 'same.sd'){ pars[[k]][j] <- mean(x[ntile.ts(x,m) == (j-1)]) pars[[k]][j] <- sd(x) }else{ pars[[k]][j] <- mean(x[ntile.ts(x,m) == (j-1)]) pars[[k]][j] <- sd(x[ntile.ts(x,m) == (j-1)]) } } } pars } plot.components <- function(x,m,dist,pars=NULL,...){ fd.name <- ifelse(dist == "norm", "normal", dist) pars <- if(is.null(pars)) get.named.parlist(x,m,fd.name,...) hist(x,freq=F) r <- extendrange(x,f=.1) r <- seq(r[1],r[2],,1000) for(i in 1:m){ dens <- eval(call(paste('d',dist,sep=''),r,pars[[2]][i],pars[[1]][i])) lines(r,1/m*dens) } } fit.fourier <- function(u, curves = NULL, plot=F, ...){ w <- length(u) # maximum number of harmonics h <- if(w%%2 == 1) (w-1)/2 else w/2 t <- 1:w pgram <- A <- B <- ranked.harm <- integer(h) A <- B <- MSD <- numeric(h) ubar <- mean(u) # Estimate fourier coefficients for(j in 1:h){ A[j] <- 2/w * sum(u * cos(2 * pi * j * t/w )) B[j] <- 2/w * sum(u * sin(2 * pi * j * t/w )) } MSDu <- 1/w * sum( (u - ubar)^2 ) MSD <- 1/2 * ( A^2 + B^2 ) ranked.harm <- order(MSD,decreasing=T) pgram <- cumsum(MSD[ranked.harm]) / MSDu f.curves <- list() if(!is.null(curves)){ for(i in curves){ f.curves[[i]] = numeric(w) for(j in ranked.harm[1:i]) f.curves[[i]] = f.curves[[i]] + A[j]*cos(2*pi*j*t/w) + B[j]*sin(2*pi*j*t/w) f.curves[[i]] = f.curves[[i]] + ubar } } if(plot) plot(0:length(pgram),c(0,pgram), xlab="Ranked Harmonic", ylab="Percent of Explained Variation", main="Cumulative Periodogram", type='l', col="steelblue", ...) return(list(u=u,w=w,h=h,A=A,B=B,ranked.harm=ranked.harm,MSD=MSD,MSDu=MSDu,pgram=pgram,curves=f.curves)) } plot.harmonics <- function(ff,h=ff$h,...){ w <- ff$w A <- ff$A B <- ff$B u <- ff$u t <- seq(1,w,length.out=10*ff$w) f.curve <- list(x = numeric(length(t)),y = numeric(length(t))) f.curve$x <- t for(j in ff$ranked.harm[1:h]) f.curve$y = f.curve$y + A[j]*cos(2*pi*j*t/w) + B[j]*sin(2*pi*j*t/w) f.curve$y = f.curve$y + mean(u) plot(f.curve,type='l',...) invisible(f.curve) } fit.harmonic.ar <- function(ff,h=ff$h,res=1){ require(forecast) w <- ff$w A <- ff$A B <- ff$B u <- ff$u if(w%%2 == 1){ if( h > (w-1)/2 ) h <-(w-1)/2 }else{ if( h > w/2 ) h <- w/2 } t <- seq(1,w,length.out=res*ff$w) harmonic <- ar.fit <- ar.fc <- vector('list',h) for(k in 1:length(ff$ranked.harm[1:h])){ j <- ff$ranked.harm[k] harmonic[[k]] = A[j]*cos(2*pi*j*t/w) + B[j]*sin(2*pi*j*t/w) #print(k) #if(k==23)browser() ar.fit[[k]] <- ar(harmonic[[k]])#,method='mle') ar.fc[[k]] <- predict(ar.fit[[k]],n.ahead=res) } ar.pred <- sapply(ar.fc,"[[",'pred') ar.se <- sapply(ar.fc,"[[",'se') ar.upper <- qnorm(.95,sd=ar.se,mean=ar.pred) #ar.sd <- qnorm(.683,sd=ar.se,mean=ar.pred) ar.lower <- qnorm(.05,sd=ar.se,mean=ar.pred) #browser() return(list(mean=sum(ar.pred) + mean(u), lower=sum(ar.lower) + mean(u), sd=sum(ar.se), upper=sum(ar.upper) + mean(u))) } fit.harmonic.mvhmm <- function(ff,h=ff$h,res=1){ w <- ff$w A <- ff$A B <- ff$B u <- ff$u if(w%%2 == 1){ if( h > (w-1)/2 ) h <-(w-1)/2 }else{ if( h > w/2 ) h <- w/2 } t <- seq(1,w,length.out=res*ff$w) harmonic <- ar.fit <- ar.fc <- vector('list',h) for(k in 1:length(ff$ranked.harm[1:h])){ j <- ff$ranked.harm[k] harmonic[[k]] = A[j]*cos(2*pi*j*t/w) + B[j]*sin(2*pi*j*t/w) #print(k) #if(k==23)browser() ar.fit[[k]] <- ar(harmonic[[k]])#,method='mle') ar.fc[[k]] <- predict(ar.fit[[k]],n.ahead=res) } ar.pred <- sapply(ar.fc,"[[",'pred') ar.se <- sapply(ar.fc,"[[",'se') ar.upper <- qnorm(.95,sd=ar.se,mean=ar.pred) #ar.sd <- qnorm(.683,sd=ar.se,mean=ar.pred) ar.lower <- qnorm(.05,sd=ar.se,mean=ar.pred) #browser() return(list(mean=sum(ar.pred) + mean(u), lower=sum(ar.lower) + mean(u), sd=sum(ar.se), upper=sum(ar.upper) + mean(u))) } #WAVELET 1D Wavelet transform with optional singificance testing # # [WAVE,PERIOD,SCALE,COI] = wavelet(Y,DT,PAD,DJ,S0,J1,MOTHER,PARAM) # # Computes the wavelet transform of the vector Y (length N), # with sampling rate DT. # # By default, the Morlet wavelet (k0=6) is used. # The wavelet basis is normalized to have total energy=1 at all scales. # # # INPUTS: # # Y = the time series of length N. # DT = amount of time between each Y value, i.e. the sampling time. # # OUTPUTS: # # WAVE is the WAVELET transform of Y. This is a complex array # of dimensions (N,J1+1). FLOAT(WAVE) gives the WAVELET amplitude, # ATAN(IMAGINARY(WAVE),FLOAT(WAVE) gives the WAVELET phase. # The WAVELET power spectrum is ABS(WAVE)^2. # Its units are sigma^2 (the time series variance). # # # OPTIONAL INPUTS: # # *** Note *** setting any of the following to -1 will cause the default # value to be used. # # PAD = if set to 1 (default is 0), pad time series with enough zeroes to get # N up to the next higher power of 2. This prevents wraparound # from the end of the time series to the beginning, and also # speeds up the FFT's used to do the wavelet transform. # This will not eliminate all edge effects (see COI below). # # DJ = the spacing between discrete scales. Default is 0.25. # A smaller # will give better scale resolution, but be slower to plot. # # S0 = the smallest scale of the wavelet. Default is 2*DT. # # J1 = the # of scales minus one. Scales range from S0 up to S0*2^(J1*DJ), # to give a total of (J1+1) scales. Default is J1 = (LOG2(N DT/S0))/DJ. # # MOTHER = the mother wavelet function. # The choices are 'MORLET', 'PAUL', or 'DOG' # # PARAM = the mother wavelet parameter. # For 'MORLET' this is k0 (wavenumber), default is 6. # For 'PAUL' this is m (order), default is 4. # For 'DOG' this is m (m-th derivative), default is 2. # # # OPTIONAL OUTPUTS: # # PERIOD = the vector of "Fourier" periods (in time units) that corresponds # to the SCALEs. # # SCALE = the vector of scale indices, given by S0*2^(j*DJ), j=0...J1 # where J1+1 is the total # of scales. # # COI = if specified, then return the Cone-of-Influence, which is a vector # of N points that contains the maximum period of useful information # at that particular time. # Periods greater than this are subject to edge effects. # This can be used to plot COI lines on a contour plot by doing: # # contour(time,log(period),log(power)) # plot(time,log(coi),'k') # #---------------------------------------------------------------------------- # Copyright (C) 1995-2004, Christopher Torrence and Gilbert P. Compo # # This software may be used, copied, or redistributed as long as it is not # sold and this copyright notice is reproduced on each copy made. This # routine is provided as is without any express or implied warranties # whatsoever. # # Notice: Please acknowledge the use of the above software in any publications: # ``Wavelet software was provided by C. Torrence and G. Compo, # and is available at URL: http://paos.colorado.edu/research/wavelets/''. # # Reference: Torrence, C. and G. P. Compo, 1998: A Practical Guide to # Wavelet Analysis. Bull. Amer. Meteor. Soc., 79, 61-78. # # Please send a copy of such publications to either C. Torrence or G. Compo: # Dr. Christopher Torrence Dr. Gilbert P. Compo # Research Systems, Inc. Climate Diagnostics Center # 4990 Pearl East Circle 325 Broadway R/CDC1 # Boulder, CO 80301, USA Boulder, CO 80305-3328, USA # E-mail: chris[AT]rsinc[DOT]com E-mail: compo[AT]colorado[DOT]edu #---------------------------------------------------------------------------- wavelet=function(Y,DT=1,pad=0,dj=0.25,param=6){ #Y is time series to be analyzed #DT is timestep for annual data, 1 #pad data ? 0=F, 1=T #dj= spacing between discrete scales (.25) #param = wavenumber (6) s0=2*DT #s0=DT n1 = length(Y) J1=floor((log2(n1*DT/s0))/dj) #....construct time series to analyze, pad if necessary x = Y - mean(Y) if (pad == 1){ base2 = trunc(log(n1)/log(2) + 0.4999) # power of 2 nearest to N x = c(x, rep(0, 2^(base2 + 1) - n1)) } n = length(x) #....construct wavenumber array used in transform [Eqn(5)] k = (1:trunc(n/2)) k = k*((2*pi)/(n*DT)) k = c(0, k, -rev(k[1:floor((n-1)/2)])) #....compute FFT of the (padded) time series f = fft(x) # [Eqn(3)] #....construct SCALE array & empty PERIOD & WAVE arrays scale = s0*2^((0:J1)*dj) period = scale; wave = matrix(data=0, ncol=n, nrow=J1+1) # define the wavelet array wave = as.complex(wave) # make it complex wave=matrix(data=wave, ncol=n, nrow=J1+1) # loop through all scales and compute transform for(a1 in 1:(J1+1)){ scl=scale[a1] #source("wave_bases_r.txt") nn = length(k); k0 = param expnt = -(scl*k - k0)^2/(2*(k > 0)) norm = sqrt(scl*k[2])*(pi^(-0.25))*sqrt(nn) # total energy=N [Eqn(7)] daughter = norm*exp(expnt) daughter = daughter*(k > 0) # Heaviside step function fourier_factor = (4*pi)/(k0 + sqrt(2 + k0^2)) # Scale-->Fourier [Sec.3h] coi = fourier_factor/sqrt(2) # Cone-of-influence [Sec.3g] dofmin = 2 # Degrees of freedom out <- list(daughter=daughter, fourier_factor=fourier_factor,coi=coi,dofmin=dofmin) daughter=out$daughter fourier_factor=out$fourier_factor coi=out$coi dofmin=out$dofmin wave[a1,] = fft((f*daughter), inverse = TRUE)/(length(f*daughter)) # wavelet transform[Eqn(4)] } period = fourier_factor*scale coi = coi*c(1:(floor(n1 + 1)/2), rev(1:floor(n1/2))) * DT wave = wave[,1:n1] # get rid of padding before returning power=abs(wave)^2 list(wave=wave, period=period, scale=scale, power=power, coi=coi) # end of code } rl_states <- function(x){ rl <- rle(x)$lengths df <- as.data.frame(table(rl)) count <- df$Freq lens <- as.numeric(levels(df$rl)) return(list(rl=rl,len=lens,count=count)) } rl.count <- function(x,ns){ rl <- rle(ntile.ts(x,ns))$lengths df <- as.data.frame(table(rl)) count <- df$Freq lens <- as.numeric(levels(df$rl)) return(list(rl=rl,len=lens,count=count)) } rl.count.lower <- function(x,ns){ ql <- quantile(x,1/ns) x[x > ql] <- NA x[!is.na(x)] <- 1 rl <- rle(as.vector(x)) df <- as.data.frame(table(rl$lengths[!is.na(rl$values)])) count <- df$Freq lens <- as.numeric(levels(df[,1])) return(list(rl=rl$lengths,len=lens,count=count)) } rl.count.upper <- function(x,ns){ qu <- quantile(x,1-1/ns) x[x < qu] <- NA x[!is.na(x)] <- 1 rl <- rle(as.vector(x)) df <- as.data.frame(table(rl$lengths[!is.na(rl$values)])) count <- df$Freq lens <- as.numeric(levels(df[,1])) return(list(rl=rl$lengths,len=lens,count=count)) } boxplot.stats <- function(sims,x){ layout(rbind(1:5)) boxplot(apply(sims,1,mean)) points(mean(x),col='Blue',pch=2) mtext("Mean",1,1) boxplot(apply(sims,1,sd)) points(sd(x),col='Blue',pch=2) mtext('SD',1,1) boxplot(apply(sims,1,min)) points(min(x),col='Blue',pch=2) mtext('Min',1,1) boxplot(apply(sims,1,max)) points(max(x),col='Blue',pch=2) mtext('Max',1,1) boxplot(apply(sims,1,mylag,1,docor=T)) points(mylag(x,1,docor=T),col='Blue',pch=2) mtext('Lag 1 AC',1,1) abline(h=) } boxplot.stats.list <- function(stats,x){ par(mar=c(5,3,3,.5)+.1) layout(rbind(1:7)) boxplot(stats$mean) points(mean(x),col='Blue',pch=2) mtext("Mean",1,1) boxplot(stats$sd) points(sd(x),col='Blue',pch=2) mtext('SD',1,1) boxplot(stats$skew) points(skewness(x),col='Blue',pch=2) mtext('Skew',1,1) boxplot(stats$min) points(min(x),col='Blue',pch=2) mtext('Min',1,1) boxplot(stats$max) points(max(x),col='Blue',pch=2) mtext('Max',1,1) boxplot(stats$lag1) points(mylag(x,1,docor=T),col='Blue',pch=2) mtext('Lag 1 AC',1,1) boxplot(stats$lag2) points(mylag(x,2,docor=T),col='Blue',pch=2) mtext('Lag 2 AC',1,1) } stats_dt_ggplot <- function(stats, model, which=c('mean','sd', 'skew', 'min', 'max', 'lag1'), labs=c('Mean','SD', 'Skew', 'Min', 'Max', 'Lag 1 AC')){ dt <- data.table(stats[[which[1]]]) for(i in 2:length(which)) dt <- cbind(dt,stats[[which[i]]]) setnames(dt,which) dt$sim <- factor(1:nrow(dt)) dt <- melt(dt) dt$model <- model dt$variable <- factor(dt$variable) as.data.table(dt) } obs_stats <- function(x, which=c('mean','sd', 'skewness', 'min', 'max', 'lag1'), labs=c('Mean','Standard Deviation', 'Skewness', 'Minimum', 'Maximum', 'Lag 1 AC')){ require('e1071') dt <- data.table(variable=character(0),value=numeric(0)) for(i in 1:length(which)) dt <- rbind(dt, data.table(variable=labs[i],value=do.call(which[i],list(x)))) dt$variable <- factor(dt$variable) dt } lag1 <- function(x) mylag(x,1,docor=T) skew <- function(x) skewness(x) boxplot.stats3 <- function(sims1,sims2,sims3,x,...){ require(e1071) layout(rbind(1:3,4:6)) boxplot(cbind(apply(sims1,1,mean),apply(sims2,1,mean),apply(sims3,1,mean)),...) abline(h=mean(x),col='Blue') mtext("Mean",3,1) boxplot(cbind(apply(sims1,1,sd),apply(sims2,1,sd),apply(sims3,1,sd)),...) abline(h=sd(x),col='Blue') mtext('SD',3,1) boxplot(cbind(apply(sims1,1,skewness),apply(sims2,1,skewness),apply(sims3,1,skewness)),...) abline(h=skewness(x),col='Blue') mtext('Skew',3,1) boxplot(cbind(apply(sims1,1,min),apply(sims2,1,min),apply(sims3,1,min)),...) abline(h=min(x),col='Blue') mtext('Min',3,1) boxplot(cbind(apply(sims1,1,max),apply(sims2,1,max),apply(sims3,1,max)),...) abline(h=max(x),col='Blue') mtext('Max',3,1) boxplot(cbind(apply(sims1,1,mylag,1,docor=T),apply(sims2,1,mylag,1,docor=T),apply(sims3,1,mylag,1,docor=T)),...) abline(h=mylag(x,1,docor=T),col='Blue') mtext('Lag 1 AC',3,1) } boxplot.stats.n.list <- function(stats,x,...){ require(e1071) layout(rbind(1:3,4:6)) boxplot(sapply(stats,'[[','mean'),...) abline(h=mean(x),col='Blue') mtext("Mean",3,1) boxplot(sapply(stats,'[[','sd'),...) abline(h=sd(x),col='Blue') mtext('SD',3,1) boxplot(sapply(stats,'[[','skew'),...) abline(h=skewness(x),col='Blue') mtext('Skew',3,1) boxplot(sapply(stats,'[[','min'),...) abline(h=min(x),col='Blue') mtext('Min',3,1) boxplot(sapply(stats,'[[','max'),...) abline(h=max(x),col='Blue') mtext('Max',3,1) boxplot(sapply(stats,'[[','lag1'),...) abline(h=mylag(x,1,docor=T),col='Blue') mtext('Lag 1 AC',3,1) } plot.dthmm <- function(x,res=1000,cols=NULL,...){ m <- length(x$delta) dens <- matrix(NA,nrow=m,ncol=res) r <- extendrange(x$x,f=.25) xrange <- seq(r[1],r[2],len=res) for(i in 1:m){ if(x$distn == 'gamma') dens[i,] <- dgamma(xrange,shape=x$pm$shape[i],rate=x$pm$rate[i]) else if(x$distn == 'norm') dens[i,] <- dnorm(xrange,mean=x$pm$mean[i],sd=x$pm$sd[i]) else stop('Distribution not supported') } hist(x$x,freq=F,ylim=c(0,max(c(hist(x$x,plot=F)$density,max(dens)))),...) for(i in 1:m) lines(xrange,dens[i,],col=ifelse(is.null(cols),i+1,cols[i])) } plot.dthmm.stat <- function(x,res=1000,cols=NULL,...){ m <- length(x$delta) dens <- matrix(0,nrow=m+1,ncol=res) r <- extendrange(x$x,f=.25) xrange <- seq(r[1],r[2],len=res) delta <- statdist(x$Pi) for(i in 1:m){ if(x$distn == 'gamma'){ dens[i,] <- delta[i]*dgamma(xrange,shape=x$pm$shape[i],rate=x$pm$rate[i]) }else if(x$distn == 'norm'){ dens[i,] <- delta[i]*dnorm(xrange,mean=x$pm$mean[i],sd=x$pm$sd[i]) }else{ stop('Distribution not supported') } dens[m+1,] <- dens[m+1,] + dens[i,] } hist(x$x,freq=F,...)#ylim=c(0,max(c(hist(x$x,plot=F)$density,max(dens)))),...) for(i in 1:m) lines(xrange,dens[i,],col=ifelse(is.null(cols),i+1,cols[i])) lines(xrange,dens[m+1,],col=1,lwd=1.5) } ggplot_stationary_hmm <- function(x,res=1000,cols=NULL,...){ m <- length(x$delta) dens <- matrix(0,nrow=m+1,ncol=res) r <- extendrange(x$x,f=.25) xrange <- seq(r[1],r[2],len=res) delta <- statdist(x$Pi) for(i in 1:m){ if(x$distn == 'gamma'){ dens[i,] <- delta[i]*dgamma(xrange,shape=x$pm$shape[i],rate=x$pm$rate[i]) }else if(x$distn == 'norm'){ dens[i,] <- delta[i]*dnorm(xrange,mean=x$pm$mean[i],sd=x$pm$sd[i]) }else{ stop('Distribution not supported') } dens[m+1,] <- dens[m+1,] + dens[i,] } p <- ggplot()+ geom_histogram(data=data.frame(x=as.vector(x$x)),aes(x=x,y=..density..), binwidth=diff(range(x$x))/8,fill='white',color='black')+ theme_bw() dt <- data.table(x=numeric(0),y=numeric(0), state=integer(0)) for(i in 1:m) dt <- rbind(dt, data.table(x=xrange,y=dens[i,], state=i)) dt$state <- factor(dt$state) p <- p + geom_line(data=dt,aes(x=x,y=y,color=state)) + geom_line(data=data.frame(x=xrange,y=dens[m+1,]),aes(x=x,y=y),color='black',size=1) + scale_color_tableau() p } matexp <- function(x,m){ mat <- x for(i in 1:m) mat <- mat %*% x mat } count.run.lengths <- function(sims, count.max=25, ns=3){ nsims <- nrow(sims) sim.len <- ncol(sims) runs.mat <- matrix(NA,ncol=sim.len,nrow=nsims) for(i in 1:nsims){ runs <- rl.count(sims[i,],ns) runs.mat[i,1:length(runs$rl)] <- runs$rl } runs.mat } sim.stats <- function(sims, x, count.max=35, ns=3, wavelet.res = 115){ wavelet.res <- length(wavelet(sims[1,], pad=1, dj=.05)$scale) nsims <- nrow(sims) sim.len <- ncol(sims) stats <- list() stats$runs <- stats$runs.upper <- stats$runs.lower <- matrix(NA,ncol=sim.len,nrow=nsims) stats$pdf.x <- seq(from=0,to=max(x)*1.25,len=50) stats$pdf <- matrix(NA,ncol=length(stats$pdf.x),nrow=nsims) stats$spec <- stats$freq <- matrix(NA,ncol=length(spectrum(sims[1,],plot=F)$spec),nrow=nsims) stats$dens <- stats$dens.lower <- stats$dens.upper <- matrix(NA,ncol=512,nrow=nsims) stats$dens.x <- stats$dens.x.lower <- stats$dens.x.upper <- numeric(512) stats$counts <- matrix(0,ncol=count.max,nrow=nsims) stats$pow <- matrix(0,ncol=wavelet.res,nrow=nsims) pb <- txtProgressBar(1,nsims,style=3) for(i in 1:nsims){ #cat(i,'\n') setTxtProgressBar(pb,i) runs <- rl.count(sims[i,],ns) runs.upper <- rl.count.upper(sims[i,],ns) runs.lower <- rl.count.lower(sims[i,],ns) #if(i==32)browser() stats$runs[i,1:length(runs$rl)] <- runs$rl stats$runs.upper[i,1:length(runs.upper$rl)] <- runs.upper$rl stats$runs.lower[i,1:length(runs.lower$rl)] <- runs.lower$rl d <- density(stats$runs[i,],bw=1,from=1,to=count.max,na.rm=T) stats$dens[i,] <- d$y d <- density(stats$runs.lower[i,],bw=1,from=1,to=count.max,na.rm=T) stats$dens.lower[i,] <- d$y d <- density(stats$runs.upper[i,],bw=1,from=1,to=count.max,na.rm=T) stats$dens.upper[i,] <- d$y d.sm <- sm.density(sims[i,], display='none', eval.points = stats$pdf.x) stats$pdf[i,] <- d.sm$estimate stats$counts[i,runs$len] <- runs$count stats$pow[i,] <- apply(wavelet(sims[i,], pad=1, dj=.05)$power,1,sum) #browser() spec <- spectrum(sims[i,],plot=F) stats$spec[i,] <- spec$spec stats$freq[i,] <- spec$freq } close(pb) stats$dens.x <- d$x stats$mean <- apply(sims,1,mean) stats$sd <- apply(sims,1,sd) stats$skew <- apply(sims,1,skewness) stats$min <- apply(sims,1,min) stats$max <- apply(sims,1,max) stats$lag1 <- apply(sims,1,mylag,1,docor=T) stats$lag2 <- apply(sims,1,mylag,2,docor=T) stats } knn.sim <- function(x,sim.len=length(x),nn=floor(sqrt(length(x)))){ W <- cumsum(oneoverk(nn)) sim <- numeric(sim.len) ll <- laglead(x,1,1) # random srarting year this.neighbor <- sample(1:length(x),1) sim[1] <- x[this.neighbor] for(i in 2:sim.len){ # neighbors to the current value # dont use the last value because there is no following value this.x <- ll$lag[!(ll$lag == sim[i-1])] xsample <- c(sim[i-1],this.x) neighbors <- order(as.matrix(dist(xsample))[,1])[-1][1:nn] r1 <- runif(1) this.neighbor <- neighbors[rank(c(r1,W))[1]] #choose following value to neighbor selected val <- xsample[this.neighbor] if(is.na(val))browser() sim[i] <- ll$lead[ll$lag == val] } sim } statdist <- function(tpm){ m <- nrow(tpm) ones <- rbind(rep(1,m)) I <- diag(rep(1,m)) U <- matrix(rep(1,m^2),m) as.vector(ones %*% solve(I - tpm + U)) } BIC.dthmm <- function(x){ ## Return the Baysian Information criterion value for a fitted discrete ## time hidden markov model from the HiddenMarkov package # Model order m <- length(x$delta) # Length of data T <- length(x$x) # Log liklihood value LL <- x$LL # number of parameters p <- m+m^2 # BIC -2*LL + p*log(T) } AIC.dthmm <- function(x){ ## Return the Akaieke Information criterion value for a fitted discrete ## time hidden markov model from the HiddenMarkov package # Model order m <- length(x$delta) # Log Liklihood value LL <- x$LL # number of parameters p <- m+m^2 # AIC -2*LL + 2*p } AICc.dthmm <- function(x){ ## Return the Akaieke Information criterion value for a fitted discrete ## time hidden markov model from the HiddenMarkov package aic <- AIC(x) # Model order m <- length(x$delta) T <- length(x$x) # Log Liklihood value LL <- x$LL # number of parameters p <- m+m^2 # AIC aic + 2*p*(p+1)/(T-p-1) } dthmm.gamma.mean <- function(x){ sum(statdist(x$Pi)*x$pm$shape/x$pm$rate) } dthmm.gamma.var <- function(x){ mu <- dthmm.gamma.mean(x) sum(statdist(x$Pi) * ( (x$pm$shape/x$pm$rate - mu)^2 + x$pm$shape/x$pm$rate^2)) } dthmm.gamma.skew <- function(x){ mu <- dthmm.gamma.mean(x) sd <- sqrt(dthmm.gamma.var(x)) sd^(-3)*sum(statdist(x$Pi) * ( (x$pm$shape/x$pm$rate - mu)^3 + 3*x$pm$shape/x$pm$rate^2*(x$pm$shape/x$pm$rate - mu))) } dthmm.gamma.acf <- function(x,lag.max=10){ delta <- statdist(x$Pi) mu <- x$pm$shape/x$pm$rate acf <- numeric(lag.max) for(k in 1:lag.max) acf[k] <- (rbind(delta) %*% diag(mu) %*% matexp(x$Pi,k) %*% cbind(mu) - sum(delta * mu)^2 )/dthmm.gamma.var(x) acf } intpdf <- function(v, x, y=NULL){ if(is.list(x)){ y <- as.vector(x$y) x <- as.vector(x$x) } fun <- approxfun(x=x, y=y) lower <- min(x) upper <- v if(lower > upper || upper < lower) 0 else integrate(fun,lower,upper,subdivisions=1000)$value } median.dist <- function(x,y=NULL){ if(is.list(x)){ y <- as.vector(x$y) x <- as.vector(x$x) } ic <- mean(x) lower <- min(x) upper <- ic f <- function(v,x,y){ abs(intpdf(v,x,y)-1/2) } optimize(f,upper=upper,lower=lower,x=x,y=y)$minimum } pc2in <- function(x){ x*12/72.27 } hmm.forecast.traces <- function(x, H, len){ n <- length(x$x) ts <- x$x traces <- matrix(NA,nrow=len,ncol=H) for(i in 1:len){ for(j in 1:H){ mn <- eval(call(paste('hmm.mean',x$distn,sep='.'),x$pm)) state <- if(j == 1) which.min(abs(mn - ts[length(ts)])) else trans.state trans.state <- rank(c(runif(1),cumsum(x$Pi[state,])))[1] traces[i,j] <- do.call(paste('r',x$distn,sep=''), c(n=1,lapply(x$pm,'[[',trans.state))) } } traces } hmm.mean.gamma <- function(x){ x$shape/x$rate } hmm.mean.norm <- function(x){ x$mean } #returns the best alpha and degree best_locfit_par <- function(formula, data, a = seq(0.2,1.0,by=0.05), n = length(a), f=c(gcvplot,aicplot),...){ # get the gcv values for all combinations of deg and alpha d1 <- f(formula, data=data, deg=1, alpha=a, kern='bisq', scale=T,...) d2 <- f(formula, data=data, deg=2, alpha=a, kern='bisq', scale=T,...) gcvs <- c(d1$values,d2$values) best <- order(gcvs)[1] #get the best alpha and degree bestd <- c(rep(1,n),rep(2,n))[best] bestalpha <- c(a,a)[best] return(list(p=bestd,a=bestalpha,gcv=gcvs[best])) } Pi_init <- function(n,type='uniform'){ matrix(rep(1/n,n^2),n) } delta_init <- function(n, type='uniform'){ d <- rnorm(n)^2 d/sum(d) } sim_hmm <- function (object, nsim = 1, states=NULL, seed = NULL, ...) { if (!is.null(seed)) set.seed(seed) y <- if(is.null(states)) simulate(mchain(NULL, object$Pi, round(object$delta)), nsim)$mc else states rname <- paste("r", object$distn, sep = "") x <- rep(NA, nsim) for (i in 1:nsim) { x[i] <- do.call(rname, c(list(n = 1), HiddenMarkov:::getj(object$pm, y[i]), HiddenMarkov:::getj(object$pn, i))) } object$x <- x object$y <- y return(object) } # Multiple plot function # # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) # - cols: Number of columns in layout # - layout: A matrix specifying the layout. If present, 'cols' is ignored. # # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), # then plot 1 will go in the upper left, 2 will go in the upper right, and # 3 will go all the way across the bottom. # multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { require(grid) # Make a list from the ... arguments and plotlist plots <- c(list(...), plotlist) numPlots = length(plots) # If layout is NULL, then use 'cols' to determine layout if (is.null(layout)) { # Make the panel # ncol: Number of columns of plots # nrow: Number of rows needed, calculated from # of cols layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, nrow = ceiling(numPlots/cols)) } if (numPlots==1) { print(plots[[1]]) } else { # Set up the page grid.newpage() pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) # Make each plot, in the correct location for (i in 1:numPlots) { # Get the i,j matrix positions of the regions that contain this subplot matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col)) } } } winter_mean_from_monthly_mat <- function(d.mat, name='value'){ names(d.mat) <- c('year',1:12) d.mat$year <- factor(d.mat$year) d.dt <- as.data.table(melt(d.mat,variable.name = "month")) d.dt[,date:=as.Date(sprintf('%s-%s-01',as.character(year),month))] season <- function(x, winter=c(11,12,1,2,3)) ifelse(lubridate:::month(x) %in% winter, 'winter', 'summer') wy <- function(x, start=10) ifelse(month(x) >= start, year(x) + 1, year(x)) d.dt[,season:=season(date)] d.dt[,wyear:=wy(date)] setkey(d.dt, season) d <- as.data.table(ddply(d.dt['winter'], .(wyear), summarize, value=mean(value))) setnames(d, 'wyear', 'date') setnames(d, 'value', name) d } dlmregression <- function(X1,Y1,times,pnames=NULL){ require(data.table) require(reshape2) require(dlm) nd = nrow(X1) np = ncol(X1)+1 test_lm = lm(Y1~X1) coef = summary(test_lm)$coefficients dV = var(test_lm$resid) dW = rep(dV/10,np) m0 = coef[,1] C0 = coef[,2] mod1 = dlmModReg(X1,dV=dV,dW=dW,m0=m0,C0=matrix(C0,np,np,byrow=T)) modtimeseries = matrix(0,length(Y1), 9) smoothmod = dlmSmooth(Y1,mod1) smoothvar = dlmSvd2var(smoothmod$U.S,smoothmod$D.S) pars = list() for(i in 1:np){ nm <- ifelse(is.null(pnames),paste0('B',i-1),pnames[i]) pars[[i]] = data.table(time=times,parameter=nm,expected=numeric(nd),ul=numeric(nd),ll=numeric(nd)) pars[[i]]$expected = smoothmod$s[-1,i] sds = sapply(lapply(smoothvar,diag),'[',i)[-1] pars[[i]]$ul = pars[[i]]$expected + qnorm(0.75, sd=sds) pars[[i]]$ll = pars[[i]]$expected + qnorm(0.25, sd=sds) } pars <- do.call('rbind',pars) return(data.table(melt(pars,id.vars=c('time','parameter')))) } predict_plus_se_sample <- function(model, newdata, nsims){ fit <- predict(model,newdata, se.fit=TRUE) sims <- fit$fit + rnorm(sd=fit$se.fit,n=nsims) return(sims) } lagged_dt <- function(dt, nlags=1, timecol='date', datacol=NULL){ nm <- names(dt) datacol <- ifelse(is.null(datacol), all_others(nm,timecol)[1], datacol) lagdata <- dt[[datacol]] lagdt <- as.data.table(matrix(NA,ncol=nlags,nrow=length(lagdata))) setnames(lagdt,paste0(datacol,'_lag',1:nlags)) for(i in 1:nlags) lagdt[,i] <- mylag(lagdata,-i) lagdt[[timecol]] <- dt[[timecol]] return(na.omit(merge(dt,lagdt,by=timecol))) } all_others <- function(x,besides){ x[!(x %in% besides)] } block_hmm_sim <- function(hmm, statepred, nblocks, sim_length){ np <- length(statepred) base_block_size <- floor(np/nblocks) nblocks_sim <- floor(sim_length/base_block_size) block_size <- rep(base_block_size, floor(sim_length/base_block_size)) if(sim_length %% base_block_size != 0){ block_size[nblocks_sim+1] <- sim_length %% base_block_size nblocks_sim <- nblocks_sim + 1 } sims <- numeric(0) for(i in 1:nblocks_sim){ start_ind <- sample(1:(np-block_size[i]+1), 1) #print(start_ind) block_ind <- start_ind:(start_ind+block_size[i]-1) statepred_block <- statepred[block_ind] sim_states <- ifelse(runif(block_size[i]) > statepred_block, 2, 1) #sim_states <- ifelse(statepred_block>.5, 2, 1) #browser() sims <- c(sims, sim_hmm(hmm, sim_states, block_size[i])$x) } sims } wavelet_power <- function(x){ return(apply(wavelet(cbind(x), pad=1, dj=.05)$power,1,sum)) } wavelet_scale <- function(x){ return(wavelet(cbind(x), pad=1, dj=.05)$scale) } wavelet_default <- function(x){ return(wavelet(cbind(x), pad=1, dj=.05)) } wavelet_dt <- function(x, timelab = time(x)){ w <- wavelet(cbind(x), pad=1, dj=.05) len <- length(w$scale) rownames(w$power) <- w$scale colnames(w$power) <- timelab dt <- data.table(melt(w$power)) setnames(dt, c('scale','time','power')) return(dt) } wavelet_array <- function(x, timelab = time(x)){ w <- wavelet(cbind(x), pad=1, dj=.05) len <- length(w$scale) rownames(w$power) <- w$scale colnames(w$power) <- timelab return(w$power) } local_wavelet_sim <- function(sims, fun=mean){ require(parallel) # set up a parallel "cluster" on this machine, each core is a node cl <- makeCluster(getOption("cl.cores", 2)) # do the clustering wav <- function(i,d) wavelet(cbind(d), pad=1, dj=.05) cluster::pam(d, i, stand=TRUE) k <- 4:15 pamx_ave <- clusterApply(cl, k, pamf, dtmax_ave) pamx_max <- clusterApply(cl, k, pamf, dtmax_ave$max) } wavelet_plyr_wrapper <- function(x,timelab=NULL){ if(is.null(timelab)) timelab = 1:nrow(x) wav <- wavelet_dt(x$value,timelab=timelab) wav$kind <- x$kind[1] wav$sim <- x$sim[1] wav } wavelet_plyr_array <- function(x,timelab=NULL){ if(is.null(timelab)) timelab = 1:nrow(x) wavelet_array(x$value,timelab=timelab) } wavelet_d_ply_wrapper <- function(x,con,timelab=NULL){ if(is.null(timelab)) timelab = 1:nrow(x) wav <- wavelet_dt(x$value,timelab=timelab) wav$kind <- x$kind[1] wav$sim <- x$sim[1] dbWriteTable(con, 'wavelet', wav, append=TRUE, row.names=FALSE) } print_flush <- function(x){ cat(x,'\n') flush.console() } printf <- print_flush ggsave_tikz <- function (filename = default_name(plot), plot = last_plot(), device = default_device(filename), path = NULL, scale = 1, width = par("din")[1], height = par("din")[2], units = c("in", "cm", "mm"), dpi = 300, limitsize = TRUE, ...) { if (!inherits(plot, "ggplot")) stop("plot should be a ggplot2 plot") tikz <- function(..., width, height) tikzDevice::tikz(..., width = width, height = height) default_name <- function(plot) { paste(digest.ggplot(plot), ".tex", sep = "") } default_device <- function(filename) { tikz } units <- match.arg(units) convert_to_inches <- function(x, units) { x <- switch(units, `in` = x, cm = x/2.54, mm = x/2.54/10) } convert_from_inches <- function(x, units) { x <- switch(units, `in` = x, cm = x * 2.54, mm = x * 2.54 * 10) } if (!missing(width)) { width <- convert_to_inches(width, units) } if (!missing(height)) { height <- convert_to_inches(height, units) } if (missing(width) || missing(height)) { message("Saving ", prettyNum(convert_from_inches(width * scale, units), digits = 3), " x ", prettyNum(convert_from_inches(height * scale, units), digits = 3), " ", units, " image") } width <- width * scale height <- height * scale if (limitsize && (width >= 50 || height >= 50)) { stop("Dimensions exceed 50 inches (height and width are specified in inches/cm/mm, not pixels).", " If you are sure you want these dimensions, use 'limitsize=FALSE'.") } if (!is.null(path)) { filename <- file.path(path, filename) } device(file = filename, width = width, height = height, ...) on.exit(capture.output(dev.off())) print(plot) invisible() }