# Pohle, J., Langrock, R., van der Schaar, M., King, R. and Jensen, F.H.: #
# A primer on coupled state-switching models for multiple interacting time series #
#''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''#

# R code includes estimation functions for univariate, bivariate and CHMMs for two variables
# with state-dependent normal distributions
# needed to run the simulation experiments described in Section 3 #

# for each model, 5 functions are defined:

# a) n2w: transformation of natural (constrained) parameters into unconstrained parameters
# input: natural parameters which characterise the model
# output: vector of transformed parameters

# b) w2n: back-transformation into natural parameters
# input: vector of transformed parameters as given by n2w function
# output: natural parameters and stationary distribution vector delta

# c) mllk: evaluation of model's minus log-likelihood function
# input: transformed parameter vector, data, number of states and observations
# output: minus log-likelihood value
# C++ function used within mllk to speed up calculation

# d) mle: maximum likelihood estimation using nlm
# input: starting values for natural parameters, data, number of states
# output: maximum likelihood estimates for all model parameters found by nlm,
#       negative maximum log-likelihood value found, nlm characteristics

# e) Viterbi: calculation of Viterbi sequence
# input: corresponding model object as given by mle function, data, number of states
# output: Viterbi sequence


# object names:
# N: (total) number of states
# NpV: number of states per variable, used for CHMM
# mu: vector of mean values of the state-dependent normal distributions
#     for univariate HMMs it contains the mean values for each state, respectively,
#     for bivariate HMMs and CHMMs it contains the mean values for each state and individual, respectively, 
#     ordered as c(mu1_1,...,muNpV_1,mu1_2,...,muNpV_2) with muj_m being the mean value for state j and individual m
# sigma: vector of standard deviations of the state-dependent normal distributions
#     for univariate HMMs it contains the standard deviations values for each state, respectively,
#     for bivariate HMMs and CHMMs it contains the standard deviations values for each state and individual, respectively, 
#     ordered as c(sigma1_1,...,sigmaNpV_1,sigma1_2,...,sigmaNpV_2) with sigmaj_m being the standard deviation for state j and individual m
# gamma: transition probability matrix

# y: data vector
# data: data matrix



##### 1) univariate HMMs with state-dependent normal distribution #####

#a)
n2w_univariate<-function(mu,sigma,gamma,N){
 tsigma<-log(sigma)
 tgamma<-log(gamma/diag(gamma))
 tgamma<-tgamma[!diag(N)]
 parvect<-c(mu,tsigma,tgamma)
 return(parvect)
}

#b)
w2n_univariate<-function(parvect,N){
 mu<-parvect[1:N]
 sigma<-exp(parvect[N+1:N])
 gamma<-diag(N)
 gamma[!gamma]<-exp(parvect[2*N+1:(N*(N-1))])
 gamma<-gamma/apply(gamma,1,sum)
 delta<-solve(t(diag(N)-gamma+1),rep(1,N))
 return(list(mu=mu,sigma=sigma,gamma=gamma,delta=delta))
}

#c)
mllk_univariate <- function(parvect,y,N){
  lpn <- w2n_univariate(parvect,N)
  n <- length(y)
  allprobs <- matrix(1,n,N)
    for (j in 1:N){
      allprobs[,j] <- dnorm(y,lpn$mu[j],lpn$sigma[j])
    }
  foo <- lpn$delta 
  lscale = mllk_Rcpp(allprobs,gamma=lpn$gamma,foo,n)
  return(-lscale)
}

#d)
mle_univariate <- function(y,mu,sigma,gamma,N){
  parvect <- n2w_univariate(mu,sigma,gamma,N)
  mod <- nlm(mllk_univariate,parvect,y,N,print.level=2,iterlim=10000,stepmax=150)
  pn <- w2n_univariate(mod$estimate,N)
  return(list(mu=pn$mu,sigma=pn$sigma,delta=pn$delta,gamma=pn$gamma,mllk=mod$minimum,iterations=mod$iterations,code=mod$code))
}

#e)
viterbi_univariate <-function(y,mod,N){
 n <- length(y)
 allprobs <- matrix(1,n,N)
 for (j in 1:N){
   allprobs[,j] <- dnorm(y,mod$mu[j],mod$sigma[j])
 }
 yi <- matrix(0,n,N)
 foo <- mod$delta*allprobs[1,]
 yi[1,] <- foo/sum(foo)
 for (i in 2:n){
   foo <- apply(yi[i-1,]*mod$gamma,2,max)*allprobs[i,]
   yi[i,] <- foo/sum(foo)
 }
 iv <- numeric(n)
 iv[n] <-which.max(yi[n,])
 for (i in (n-1):1){
   iv[i] <- which.max(mod$gamma[,iv[i+1]]*yi[i,])
 }
 iv
} 



##### 2) bivariate HMMs with state-dependent normal distribution #####
# contemporaneous conditional independence assumption for the two variables observed #

#a)
n2w_bivariate<-function(mu,sigma,gamma,N){
  tsigma<-log(sigma)
  tgamma<-log(gamma/diag(gamma))
  tgamma<-tgamma[!diag(N)]
  parvect<-c(mu,tsigma,tgamma)
  return(parvect)
}

#b)
w2n_bivariate<-function(parvect,N){
  mu<-parvect[1:(N*2)]
  sigma<-exp(parvect[N*2+1:(2*N)])
  gamma<-diag(N)
  gamma[!gamma]<-exp(parvect[4*N+1:(N*(N-1))])
  gamma<-gamma/apply(gamma,1,sum)
  delta<-solve(t(diag(N)-gamma+1),rep(1,N))
  return(list(mu=mu,sigma=sigma,gamma=gamma,delta=delta))
}

#c)
mllk_bivariate <- function(parvect,data,N){
  lpn <- w2n_bivariate(parvect,N)
  n <- dim(data)[1]
  allprobs <- matrix(1,n,N)
  for (j in 1:N){
      allprobs[,j] <- dnorm(data[,1],lpn$mu[j],lpn$sigma[j])*
        dnorm(data[,2],lpn$mu[j+N],lpn$sigma[j+N])
  }
  foo <- lpn$delta  
  lscale = mllk_Rcpp(allprobs,gamma=lpn$gamma,foo,n)
  return(-lscale)
}

#d)
mle_bivariate <- function(data,mu,sigma,gamma,N){
  parvect <- n2w_bivariate(mu,sigma,gamma,N)
  mod <- nlm(mllk_bivariate,parvect,data,N,print.level=2,iterlim=10000,stepmax=150)
  pn <- w2n_bivariate(mod$estimate,N)
  return(list(mu=pn$mu,sigma=pn$sigma,delta=pn$delta,gamma=pn$gamma,mllk=mod$minimum,iterations=mod$iterations,code=mod$code))
}

#e)
viterbi_bivariate <-function(data,mod,N){
  n <- dim(data)[1]
  allprobs <- matrix(1,n,N)
  for (j in 1:N){
    allprobs[,j] <- dnorm(data[,1],mod$mu[j],mod$sigma[j])*
      dnorm(data[,2],mod$mu[j+N],mod$sigma[j+N])
  }
  yi <- matrix(0,n,N)
  foo <- mod$delta*allprobs[1,]
  yi[1,] <- foo/sum(foo)
  for (i in 2:n){
    foo <- apply(yi[i-1,]*mod$gamma,2,max)*allprobs[i,]
    yi[i,] <- foo/sum(foo)
  }
  iv <- numeric(n)
  iv[n] <-which.max(yi[n,])
  for (i in (n-1):1){
    iv[i] <- which.max(mod$gamma[,iv[i+1]]*yi[i,])
  }
  iv
} 


##### 3) CHMMs with state-dependent normal distributions, for M=2 variables #####

#a)
n2w_coupled<-function(mu,sigma,gamma,N){
  tsigma<-log(sigma)
  tgamma<-log(gamma/diag(gamma))
  tgamma<-tgamma[!diag(N)]
  parvect<-c(mu,tsigma,tgamma)
  return(parvect)
}

#b)
w2n_coupled<-function(parvect,NpV,N){
  mu<-parvect[1:(NpV*2)]
  sigma<-exp(parvect[2*NpV+1:(2*NpV)])
  gamma<-diag(N)
  gamma[!gamma]<-exp(parvect[4*NpV+1:(N*(N-1))])
  gamma<-gamma/apply(gamma,1,sum)
  delta<-solve(t(diag(N)-gamma+1),rep(1,N))
  return(list(mu=mu,sigma=sigma,gamma=gamma,delta=delta))
}

#c)
mllk_coupled <- function(parvect,data,NpV,N,ind_NpV){ #ind_NpV: index array for state combinations, defined in mle_coupled function
  lpn <- w2n_coupled(parvect,NpV,N)
  n <- dim(data)[1]
  allprobs <- matrix(1,n,N)
  for(m in 1:2){
    for (j in 1:NpV){
      allprobs[,ind_NpV[,j,m]] <- allprobs[,ind_NpV[,j,m]]*dnorm(data[,m],lpn$mu[(m-1)*NpV+j],lpn$sigma[(m-1)*NpV+j]) 
    }
  }
  foo <- lpn$delta
  lscale = mllk_Rcpp(allprobs,gamma=lpn$gamma,foo,n) 
  return(-lscale)
}

#d)
mle_coupled <- function(data,mu,sigma,gamma,NpV){
  N<-NpV^2
  ind_states<-rev(expand.grid(1:NpV,1:NpV)) #ind_states: order of the state vectors (each row is one state vector)
  ind_NpV<-array(0,dim=c(NpV,NpV,2)) #ind_NpV: index array for state combinations used in mllk function
  for(m in 1:2){
    for(j in 1:NpV)
      ind_NpV[,j,m]<-which(ind_states[,m]==j)
  }
  parvect <- n2w_coupled(mu,sigma,gamma,N)
  mod <- nlm(mllk_coupled,parvect,data,NpV,N,ind_NpV,print.level=2,iterlim=10000,stepmax=150)
  pn <- w2n_coupled(mod$estimate,NpV,N)
  return(list(mu=pn$mu,sigma=pn$sigma,delta=pn$delta,gamma=pn$gamma,mllk=mod$minimum,iterations=mod$iterations,code=mod$code))
}

#e)
viterbi_coupled<-function(data,mod,NpV){
  N<-NpV^2
  ind_states<-rev(expand.grid(1:NpV,1:NpV))
  ind_NpV<-array(0,dim=c(NpV,NpV,2))
  for(m in 1:2){
    for(j in 1:NpV)
      ind_NpV[,j,m]<-which(ind_states[,m]==j)
  }
  n <- dim(data)[1]
  allprobs <- matrix(1,n,N)
  for(m in 1:2){
    for (j in 1:NpV){
      allprobs[,ind_NpV[,j,m]] <- allprobs[,ind_NpV[,j,m]]*dnorm(data[,m],mod$mu[(m-1)*NpV+j],mod$sigma[(m-1)*NpV+j]) 
    }
  }
  yi <- matrix(0,n,N)
  foo <- mod$delta*allprobs[1,]
  yi[1,] <- foo/sum(foo)
  for (i in 2:n){
    foo <- apply(yi[i-1,]*mod$gamma,2,max)*allprobs[i,]
    yi[i,] <- foo/sum(foo)
  }
  iv <- numeric(n)
  iv[n] <-which.max(yi[n,])
  for (i in (n-1):1){
    iv[i] <- which.max(mod$gamma[,iv[i+1]]*yi[i,])
  }
  iv
} 


