# This R script contains the main functions used in Merlo, Maruotti and
# Petrella, "Two-part quantile regression models for semi-continuous 
# longitudinal data: a finite mixture approach", Statistical Modelling.

Balance_Panel = function(Data, Indiv_ColName, Time_ColName){
  Individuals = unique(Data[, get(Indiv_ColName)])
  Times = unique(Data[, get(Time_ColName)])
  
  Full_Panel = data.table(expand.grid(Individuals, Times))
  setnames(Full_Panel, c(Indiv_ColName, Time_ColName))
  setkeyv(Full_Panel, c(Indiv_ColName, Time_ColName))
  setkeyv(Data, c(Indiv_ColName, Time_ColName))
  return(Data[Full_Panel])
}

Strat_sample = function(Y, strata, year, id, n) {
  N = max(id)
  w = table(strata)/length(strata)
  G = length(table(strata))
  var.strata = c()
  for (g in 1:G) {
    var.strata[g] = var(Y[strata == (g - 1)], na.rm = T)
  }
  ng = round(n * (w * var.strata) / sum(w * var.strata), 0)
  id.new = c()
  for (g in 1:G) {
    foo = sample(x = unique(id[which(strata == (g - 1))]), size = ng[g], replace = F)
    id.new = c(id.new, foo)
  }
  id.new = rep(id.new, rep(max(year), length(id.new)))
  return(list(id.new = id.new, ng = ng))
}

require(MASS)
require(gamlss)
require(lqmm)
require(rqPen)
require(matrixStats)
require(Rcpp)
sourceCpp("pos_optim.cpp")
EM_TwoPart_QuantileMixture <- function(Y, XX, K, id, p, iter = 1e3, eps = 1e-05, coef.cutoff = 1e-06, lambda, seed = NULL)
{
  # Input:
  # Y: observations vector
  # XX: fixed covariates matrix
  # K: # of mixture components
  # id: sorted id labels
  # p: quantile level
  # iter: maximum # of iterations
  # eps: convergence threshold
  # coef.cutoff: coefficients with magnitude less than this value are set to 0
  # lambda: tuning parameter for the LASSO penalty
  
  # Output (list):
  # - iter: # of executed iterations
  # - dif: (penalized) log-likelihood difference at convergence
  # - llk: (penalized) log-likelihood at convergence
  # - pigr, beta, b.bin, b.pos, pi, sigma: estimated model parameters
  # - post: posterior weights in the EM algorithm
  # - info.criteria: penalized likelihood criteria (AIC, BIC)
  # - time: required computational time
  
  set.seed(seed)
  N <- length(Y)
  J = N/max(id)
  X = XX
  P = ncol(X)
  D <- Y == 0
  xR <- matrix(rep(t(X),K),ncol=ncol(X),byrow=TRUE)
  W = as.matrix(rep(1, nrow(X)))
  xbin <- cbind(kronecker(diag(1,K),W), xR)
  yR <- rep(Y,K)
  dR <- rep(D,K)
  w <- matrix(runif(length(yR)),ncol=K)
  w <- w/apply(w,1,sum)
  binary <- gamlss(dR ~ xbin-1, family = "BI", weights = as.vector(w), control = gamlss.control(trace=FALSE))
  pigr=coef(binary)[-c(1:K)]
  b.bin=coef(binary)[1:K]
  yL=log(yR)
  yL[yL==-Inf]=NA
  # positive <- rq.lasso.fit(x = xbin, y = yL, tau = p, lambda = lambda, weights = as.vector(w), intercept = F, coef.cutoff = coef.cutoff, penVars = c((1+K:(P+K-1))), method = "fn")
  positive <- rq(yL ~ xbin-1, tau = p, weights = as.vector(w), method = "fn", na.action = na.omit)
  beta=coef(positive)[-c(1:K)]
  b.pos=coef(positive)[1:K]
  
  sigma = mean(check(positive$residuals), na.rm = T)
  pi.vec=gauss.quad.prob(n = K, dist = "normal")$weights
  pi.vec=runif(K)
  pi.vec=pi.vec/sum(pi.vec)
  
  err <- 0
  dif <- Inf
  t.iter <- 0
  llkold <- -10^250
  
  mubin <- 1/(1+exp(-(matrix(rep(X%*%pigr,each=K), ncol=K, byrow=TRUE) + W%*%b.bin)))
  fden = (mubin)^D * ((1-mubin) * dal(log(Y), mu=matrix(rep(X%*%beta,each=K), ncol=K, byrow=TRUE) + W%*%b.pos, sigma=sigma, tau=p))^(1 - D)
  fden[is.na(fden)] = 1
  
  fprod <- apply(fden, 2, function(x) colProds(matrix(x, nrow = J, ncol = max(id))))
  
  t0 = Sys.time()
  while (dif > err & t.iter < iter) {
    t.iter <- t.iter + 1
    
    post <- fprod%*%diag(pi.vec)
    post <- post/apply(post, 1, sum)
    
    #M-step
    pi.vec.new=apply(post, 2, sum)/sum(apply(post, 2, sum))
    
    postTR <- rep(c(post), rep(table(id), K))
    
    binary <- gamlss(dR ~ xbin-1, family = "BI", weights = as.vector(postTR), control = gamlss.control(trace=FALSE))
    pigr.new=coef(binary)[-c(1:K)]
    b.bin.new=coef(binary)[1:K]
    
    yTR.pos=cbind(yR[yR>0], xbin[yR>0,], postTR[yR>0])
    yTR.pos=yTR.pos[complete.cases(yTR.pos),]
    
    lyTR.pos=log(yTR.pos[,1])
    xTR.pos=yTR.pos[,(2+K):(dim(yTR.pos)[2]-1)]
    zTR.pos=(yTR.pos[,2:(1+K)])
    wTR.pos=as.vector(yTR.pos[,dim(yTR.pos)[2]])
    
    # alternative methods to update the positive model parameters
    # method 1
    out=optim(par = c(b.pos, beta, log(sigma)), fn = beta_pos_optim, lambda=lambda, control = list(maxit = 1e5),
             Y=lyTR.pos, X=xTR.pos, W=zTR.pos, z=wTR.pos, K=K, tau=p, N=nrow(yTR.pos), P=P, method = "Nelder-Mead")
    out$par[abs(out$par) < coef.cutoff] = 0
    beta.new=out$par[-c(1:K, length(out$par))]
    b.pos.new=out$par[1:K]
    sigma.new=exp(out$par[length(out$par)])
    
    # method 2
    # out = list()
    # out$par = LASSO.fit.nonpen(x = xTR.pos, y = lyTR.pos, z = zTR.pos, tau = p, intercept = F, lambda = lambda, weights = wTR.pos, coef.cutoff = coef.cutoff)
    # beta.new = out$par[1:ncol(X)]
    # b.pos.new = out$par[-c(1:ncol(X))]
    # sigma.new = sum(wTR.pos*check(lyTR.pos - xTR.pos%*%c(beta.new) - zTR.pos%*%c(b.pos.new), tau = p), na.rm = T)/sum(wTR.pos, na.rm = T)
    
    mubin <- 1/(1+exp(-(matrix(rep(X%*%pigr.new,each=K), ncol=K, byrow=TRUE) + W%*%b.bin.new)))
    fden = (mubin)^D * ((1-mubin) * dal(log(Y), mu=matrix(rep(X%*%beta.new,each=K), ncol=K, byrow=TRUE) + W%*%b.pos.new, sigma=sigma.new, tau=p))^(1 - D)
    fden[is.na(fden)] = 1
    
    fprod <- apply(fden, 2, function(x) colProds(matrix(x, nrow = J, ncol = max(id))))
    
    llk <- sum(log(fprod%*%pi.vec.new)) - lambda * sum(abs(beta.new))
    dif <- llk - llkold
    #dif <- sum((c(pigr.new, b.bin.new, beta.new, b.pos.new, pi.vec.new) - c(pigr, b.bin, beta, b.pos, pi.vec))^2)
    err = eps*(t.iter > 5) + 1e-20*(t.iter < 5)
    ##################################
    pigr=pigr.new
    b.bin=b.bin.new
    beta=beta.new
    b.pos=b.pos.new
    pi.vec=pi.vec.new
    sigma=sigma.new
    llkold <- llk
    
    print(round(c(t.iter, dif, llk, beta, sigma, lambda), 3))
  }
  c.time = Sys.time() - t0
  
  #info.criteria
  beta.new[abs(beta.new) < coef.cutoff] = 0
  P.pos = sum(beta.new != 0)
  n.par = P + P.pos + 1 + 2 * K + K - 1
  aic.crit=-2*llk + 2*n.par
  bic.crit=-2*llk + log(max(id))*n.par
  crit=c(aic.crit, bic.crit)
  names(crit)=c("AIC", "BIC")
  
  return(list(iter=t.iter, dif=dif, llk=llk, pigr=pigr.new, beta=beta.new, b.bin=b.bin.new, b.pos=b.pos.new, pi=pi.vec.new, 
              sigma=sigma.new, post=post, info.criteria=crit, time=c.time))
}