functions {

  real gev_log (real y, real loc, real scale, real shape){
    real z;
    real inv_shape;
    inv_shape <- 1.0/shape;
    z <- 1 + (y - loc) * shape / scale;
    return -log(scale) - (1 + inv_shape)*log(z) - pow(z,-inv_shape);  
  }

  real gev_v_log (vector y, real loc, real scale, real shape){
    real inv_shape;
    real inv_shape_p1;
    real neg_inv_shape;
    vector[rows(y)] z;
    vector[rows(y)+1] lp;
    int N;

    N <- rows(y);

    if(shape == 0){
      z <- (y - loc)/scale;
      lp <- log(scale) + z + exp(-z);
    }else{
      inv_shape <- 1.0/shape;
      inv_shape_p1 <- inv_shape + 1;
      neg_inv_shape <- -inv_shape;

      z <- 1 + (y - loc) * shape / scale;
      
      for(n in 1:N){
        lp[n] <-  inv_shape_p1*log(z[n]) + pow(z[n],neg_inv_shape);
      }
      lp[N+1] <- log(scale) * N;
    }
    return -sum(lp);  
  }
  
  vector gev_v_log_v (vector y, real loc, real scale, real shape){
    real inv_shape;
    real inv_shape_p1;
    real neg_inv_shape;
    vector[rows(y)] z;
    vector[rows(y)] lp;
    int N;

    N <- rows(y);

    if(shape == 0){
      z <- (y - loc)/scale;
      lp <- log(scale) + z + exp(-z);
    }else{
      inv_shape <- 1.0/shape;
      inv_shape_p1 <- inv_shape + 1;
      neg_inv_shape <- -inv_shape;

      z <- 1 + (y - loc) * shape / scale;
      
      for(n in 1:N){
        lp[n] <- log(scale) + inv_shape_p1*log(z[n]) + pow(z[n],neg_inv_shape);
      }
    }
    return -lp;  
  }

  ## @logf  log of the GEV probability density for all sites at a given time
  ## @F     GEV cumulative distribution for all sites at a given time
  ## @L     Cholesky factor of the dependogram matrix
  ## @zeros vector of zeros (used for speed)
  real eliptical_copula_log (vector logf, vector F, matrix L, vector zeros){
    real lp;
    vector[rows(logf)] u;
    vector[rows(logf)] log_phi_u;
    int N;

    N <- rows(logf);

    for(n in 1:N){
      u[n] <- inv_Phi(F[n]);
      log_phi_u[n] <- normal_log(u[n], 0, 1);
    }

    lp <- sum(logf) - sum(log_phi_u) + multi_normal_cholesky_log(u, zeros, L);
    return lp;  
  }

  ## @logf  log of the GEV probability density for all sites at a given time
  ## @F     GEV cumulative distribution for all sites at a given time
  ## @L     Cholesky factor of the dependogram matrix
  ## @zeros vector of zeros (used for speed)
  real eliptical_copula_log_v (matrix logf, matrix F, matrix L, vector zeros){
    vector[dims(F)[2]] lp;
    vector[dims(F)[1]] u;
    vector[dims(F)[1]] log_phi_u;
    int N;
    int T;

    # logf and F are N x T matricies
    N <- dims(F)[1];
    T <- dims(F)[2];

    for(t in 1:T){
      for(n in 1:N){
        u[n] <- inv_Phi(F[n,t]);
        log_phi_u[n] <- normal_log(u[n], 0, 1);
      }

      //print("sum(col(logf,t)) ",sum(col(logf,t)));
      //print("sum(log_phi_u)",sum(log_phi_u));
      //print("multi normal ", multi_normal_cholesky_log(u, zeros, L));

      lp[t] <- sum(col(logf,t)) - sum(log_phi_u) + multi_normal_cholesky_log(u, zeros, L);
    }
    return sum(lp);  
  }

  vector gev_v_cdf_v (vector q, real loc, real scale, real shape) {
    
    vector[rows(q)] p;
    vector[rows(q)] r; 
    real neg_inv_shape;
    real ss;

    neg_inv_shape <- -1.0/shape;
    ss <- shape/scale;
    r <- 1 +  ss * (q - loc);

    if(shape == 0.0){
        p <- exp(-exp(-(q-loc)/scale));
    }else{
      for(n in 1:rows(q))
        p[n] <- exp(-pow(fmax(r[n],0),neg_inv_shape));
    }
    return p;
  }

  real gev_cdf (real q2, real loc, real scale, real shape) {
      real q;
      real p;
      real r; 
      q <- (q2 - loc)/scale;
      if (shape == 0){ 
          p <- exp(-exp(-q));
      }else{
        r <- 1 + shape * q;
        if(r < 0) r <- 0.0;
        p <- exp(-pow(r,-1/shape));
      }
      return p;
  }

  matrix dependogram (matrix x, real phi0, real phi1, real phi2){

    int S;
    matrix[dims(x)[1],dims(x)[1]] cov;

    S <- dims(x)[1];


    // off-diagonal elements
    for (i in 1:(S-1)) {
      for (j in (i+1):S) {

        cov[i,j] <- phi0 * exp(-1.0/phi1 * x[i,j]) + (1.0-phi0) * exp(-1.0/phi2 * x[i,j]);
        cov[j,i] <- cov[i,j];
      }
    }

    // diagonal elements
    for (k in 1:S)
      cov[k,k] <- 1.0;

    return cov;

  }

  matrix dependogram_exp (matrix x, real range){

    int S;
    real neg_inv_range;
    matrix[dims(x)[1],dims(x)[1]] cov;

    S <- dims(x)[1];

    neg_inv_range <- -1.0/range;

    // off-diagonal elements
    for (i in 1:(S-1)) {
      for (j in (i+1):S) {

        cov[i,j] <- exp(neg_inv_range * x[i,j]);
        cov[j,i] <- cov[i,j];
      }
    }

    // diagonal elements
    for (k in 1:S)
      cov[k,k] <- 1;

    return cov;

  }

  matrix exp_cov_matrix (matrix x, real sill, real range, real nugget){

    int S;
    matrix[dims(x)[1],dims(x)[1]] cov;
    real neg_inv_range;

    S <- dims(x)[1];
    neg_inv_range <- -1.0/range;

    // off-diagonal elements
    for (i in 1:(S-1)) {
      for (j in (i+1):S) {
        # beta0 = partial sill = scale^2
        # beta1 = range = phi
        cov[i,j] <- sill * exp(neg_inv_range * x[i,j]);
        cov[j,i] <- cov[i,j];
      }
    }

    // diagonal elements
    for (k in 1:S)
      cov[k,k] <- sill + nugget;

    return cov;

  }
}

data {
  int<lower=1> S;   // number of total locations
  int<lower=1> S_c; // number of complete locations
  int<lower=1> S_m; // number of missing locations
  int<lower=1> T;   // number of data points
  int<lower=1> G;   // number of total composite groups
  int<lower=1> G_c; // number of complete composite groups
  int<lower=1> G_m; // number of composite groups stations with missing data
  int<lower=1> R;   // number of elements in each group
  int<lower=1> P;  // number of space predictors (including intercept)
  int<lower=1> K;   // number of regression coefficient knots

  # y must have the first (S_c) columns with complete data
  # the last S_m columns can have -99 for missing
  # both groups should be evenly divisible by R
  matrix[T,S] y;    // observations
  matrix[S,S] dist; //distance matrix for stations 
  matrix[S,K] dist_knots_to_points; 

  matrix[S,P] covars;

  real mle_cop_range;

}

transformed data {

  vector[R] zero_vector_r;
  matrix[P,1] ones_vector_p;
  vector[S] y_t[T];
  vector[T] y_s[S];

  real max_y;
  real max_dist;
  real half_max_dist;
  real twice_max_dist;
  real ten_times_max_dist;
  real onehundred_times_max_dist;
  real onethousand_times_max_dist;
  real sq_max_dist;

  int start;
  int end;
  int a;
  int b;

  matrix[R,R] dist_mats[G];
  matrix[S,K] dist_knots_to_points_sq;

  # compute smaller distance matricies based on groups
  for(g in 1:G){
    start <- (g-1) * R + 1;
    end <- g * R;
    a <- 0;
    for(i in start:end){
      a <- a + 1;
      b <- 0;
      for(j in start:end){
        b <- b + 1;
        dist_mats[g,a,b] <- dist[i,j];
      }
    }
  }
  
  for(s in 1:S)
    for(t in 1:T){
      y_t[t,s] <- y[t,s];
      y_s[s,t] <- y[t,s];
    }

  for (r in 1:R) 
    zero_vector_r[r] <- 0;

  for (p in 1:P) 
    ones_vector_p[p,1] <- 1.0;

  max_dist <- max(dist);
  twice_max_dist <- 2*max_dist;
  ten_times_max_dist <- 10*max_dist;
  onehundred_times_max_dist <- 100*max_dist;
  onethousand_times_max_dist <- 1000*max_dist;
  sq_max_dist <- pow(max_dist,3.0);

  for(s in 1:S){
    for(k in 1:K){
      dist_knots_to_points_sq[s,k] <- pow(dist_knots_to_points[s,k],2);
    }
  }

}

parameters {

  vector<lower=0,upper=50>[S] loc;
  vector<lower=0,upper=50>[S] scale;
  vector<lower=-0.5,upper=0.5>[S] shape;

  matrix<lower=-20,upper=20>[P,K] basis_loc;
  matrix<lower=-20,upper=20>[P,K] basis_scale;
  matrix<lower=-20,upper=20>[P,K] basis_shape; 

  real<lower=-50,upper=50> intercept_loc;
  real<lower=-50,upper=50> intercept_scale;
  real<lower=-0.5,upper=0.5> intercept_shape;

  real<lower=0,upper=1> sill_loc;
  real<lower=0,upper=1> sill_scale;
  real<lower=0,upper=1> sill_shape;

  real<lower=0,upper=1> nugget_loc;
  real<lower=0,upper=1> nugget_scale;
  real<lower=0,upper=1> nugget_shape;

  real<lower=0> range_copula;
  real<lower=0> range_loc;
  real<lower=0> range_scale;
  real<lower=0> range_shape;

  matrix<lower=0>[P,K] range_basis_loc;
  matrix<lower=0>[P,K] range_basis_scale;
  matrix<lower=0>[P,K] range_basis_shape; 

} 

model {    

  //vector[R] lp_t[T];
  vector[T] lp_s[R];

  matrix[R,T] lp;
  matrix[R,T] F;

  matrix[R,R] L_copula;

    # holds cumulative gev 
  //vector[R] F_t[T];
  vector[T] F_s[R];

  int ind[R];

  #vector[S] loc_resid;
  #vector[S] scale_resid;
  #vector[S] shape_resid;

  #vector[R] loc_resid_group;
  #vector[R] scale_resid_group;
  #vector[R] shape_resid_group;

  vector[R] loc_group;
  vector[R] scale_group;
  vector[R] shape_group;

  vector[R] loc_mean_group;
  vector[R] scale_mean_group;
  vector[R] shape_mean_group;

  matrix[S,1] loc_mean;
  matrix[S,1] scale_mean;
  matrix[S,1] shape_mean;

  matrix[S,P] beta_loc;
  matrix[S,P] beta_scale;
  matrix[S,P] beta_shape;

  matrix[R,R] cov_loc;
  matrix[R,R] cov_scale;
  matrix[R,R] cov_shape;

  int start2;
  int end2;
  int m;
  real eclp;
  matrix[P,K] neg_inv_range_loc;
  matrix[P,K] neg_inv_range_scale;
  matrix[P,K] neg_inv_range_shape;

  for(p in 1:P){
    for(k in 1:K){
        neg_inv_range_loc[p,k] <- -1.0/(range_basis_loc[p,k] * range_basis_loc[p,k]);
        neg_inv_range_scale[p,k] <- -1.0/(range_basis_scale[p,k] * range_basis_scale[p,k]);
        neg_inv_range_shape[p,k] <- -1.0/(range_basis_shape[p,k] * range_basis_shape[p,k]);
    }
  }

  # compute regression coefficients from basis functions
  for(s in 1:S){
    for(p in 1:P){
        # zero out betas
      beta_loc[s,p] <- 0.0;
      beta_scale[s,p] <- 0.0;
      beta_shape[s,p] <- 0.0;

      # add up all the basis funcitons (gaussian kernals)
      for(k in 1:K){
        beta_loc[s,p] <- beta_loc[s,p] + basis_loc[p,k] * exp(dist_knots_to_points_sq[s,k] * neg_inv_range_loc[p,k]);
        beta_scale[s,p] <- beta_scale[s,p] + basis_scale[p,k] * exp(dist_knots_to_points_sq[s,k] * neg_inv_range_scale[p,k]);
        beta_shape[s,p] <- beta_shape[s,p] + basis_shape[p,k] * exp(dist_knots_to_points_sq[s,k] * neg_inv_range_shape[p,k]);
      }
    }
  }

  # right multiplying by a vector of ones will sum the rows  
  loc_mean <- intercept_loc + (covars .* beta_loc) * ones_vector_p;
  scale_mean <- intercept_scale + (covars .* beta_scale) * ones_vector_p;
  shape_mean <- intercept_shape + (covars .* beta_shape) * ones_vector_p;
  #for(s in 1:S){
  #  loc_mean[s] <- intercept_loc + dot_product(row(covars,s), row(beta_loc,s));
  #  scale_mean[s] <- intercept_scale + dot_product(row(covars,s), row(beta_scale,s));
  #  shape_mean[s] <- intercept_shape + dot_product(row(covars,s), row(beta_shape,s));
  #}

  # loop over groups
  for(g in 1:(G_c+G_m)){

    start2 <- (g-1) * R + 1;
    end2 <- g * R;

    m <- 0;
    for(i in start2:end2){
      m <- m + 1;
      ind[m] <- i;

      loc_mean_group[m] <- loc_mean[i,1];
      scale_mean_group[m] <- scale_mean[i,1];
      shape_mean_group[m] <- shape_mean[i,1];

      loc_group[m] <- loc[i];
      scale_group[m] <- scale[i];
      shape_group[m] <- shape[i];

    }

    cov_loc <- exp_cov_matrix(dist_mats[g], sill_loc, range_loc, nugget_loc);
    cov_scale <- exp_cov_matrix(dist_mats[g], sill_scale, range_scale, nugget_scale);
    cov_shape <- exp_cov_matrix(dist_mats[g], sill_shape, range_shape, nugget_shape);

    loc_group ~ multi_normal(loc_mean_group, cov_loc);
    scale_group ~ multi_normal(scale_mean_group, cov_scale);
    shape_group ~ multi_normal(shape_mean_group, cov_shape);

    # loop over stations in the group
    for(r in 1:R){

      # only stations with complete data contribute to the copula
      if(g <= G_c){
            # marginal gev log probability density, used also in the copula
        lp_s[r] <- gev_v_log_v(y_s[ind[r]], loc[ind[r]], scale[ind[r]], shape[ind[r]]);
        #y_s[ind[r]] ~ gev_v(loc[ind[r]], scale[ind[r]], shape[ind[r]]);
        #print(lp_s[r]);
        #increment_log_prob(sum(lp_s[r]));

          # cumulative gev probability
        F_s[r] <- gev_v_cdf_v(y_s[ind[r]], loc[ind[r]], scale[ind[r]], shape[ind[r]]);
      }else{
        for(t in 1:T)
          if(y_s[ind[r],t] != -99)
            y_s[ind[r],t] ~ gev(loc[ind[r]], scale[ind[r]], shape[ind[r]]);
      }
    }
    
    # only fit copula to complete data
    if(g <= G_c){
      L_copula <- cholesky_decompose(dependogram_exp(dist_mats[g], range_copula));

      # transpose the data so that time is the first index
      for(r in 1:R){
        for(t in 1:T){
          lp[r,t] <- lp_s[r,t];
          F[r,t] <- F_s[r,t];
        }
      }

      #for(t in 1:T){
        eclp <- eliptical_copula_log_v(lp, F, L_copula, zero_vector_r);
        increment_log_prob(eclp);
      #}
    }
  }

  // Priors, fairly diffuse
  intercept_loc ~   normal(0, 10   );
  intercept_scale ~ normal(0, 10   );
  intercept_shape ~ normal(0,  1   );

  range_copula ~ normal(0, 10);
  range_loc ~    normal(0, 10);
  range_scale ~  normal(0, 10);
  range_shape ~  normal(0, 10);

  sill_loc ~   normal(0, 1  );
  sill_scale ~ normal(0, 1  );
  sill_shape ~ normal(0, 0.1);

  nugget_loc ~   normal(0, 1  );
  nugget_scale ~ normal(0, 1  );
  nugget_shape ~ normal(0, 0.1);

  for(p in 1:P){
    basis_loc[p] ~   normal(0, 1);
    basis_scale[p] ~ normal(0, 1);
    basis_shape[p] ~ normal(0, 1);

    range_basis_loc[p] ~   normal(0, 5000);
    range_basis_scale[p] ~ normal(0, 5000);
    range_basis_shape[p] ~ normal(0, 5000);
  }

  
}