rm(list=ls())
library("MASS")
library(QREM)
# https://github.com/haimbar/QREM
library("SEMMS")
# https://github.com/haimbar/SEMMS

library("rqPen") # to compare with a Lasso-based method

# Mixed Effect Modeling and Variable Selection for Quantile Regression
# Bar, Booth, and Wells (Statistical Modelling 2021)

# Code to create Table 2 (variable selection)

simulateAR1 <- function(P, N, corcoef=0.9) {
  Sigma <- toeplitz(1:P)
  for (i in 2:P) {
    Sigma[which(Sigma == i)] <- corcoef^(i-1)
  }
  mvrnorm(N, rep(0,P), Sigma)
}

confmat <- function(allidx, actual, estimated) {
  TP <- which(estimated  %in% actual)
  FP <- which(estimated %in% setdiff(allidx,actual))
  TN <- which(setdiff(allidx,estimated) %in% setdiff(allidx,actual))
  FN <- which(setdiff(allidx,estimated) %in% actual)
  c(length(TP), length(FP),length(TN), length(FN))
}

nn <- 5
res <- matrix(0,ncol=11,nrow=100*9)
cnt <- 1
simno = 9
qns = seq(0.1,0.9,by=0.1)
for (i in 1:100) {
  M0 = simulateAR1(980,100,0)
  M = simulateAR1(20,100, corcoef=0.95)
  truepreds = 1:20
  X = cbind(M,M0)
  y = rowSums(X[,1:20]+rnorm(100,0,0.1))
  dframe <- as.data.frame(cbind(y,X))
  for (qn in qns) {
    L1fit <- rq.lasso.fit(X,y,lambda=0.1,tau = qn)
    selectedL1 <- which(L1fit$coefficients[-1]!=0)
    QREMres <- QREM_vs(dframe, 1, 2:1001,qn = qn, nn=nn)
    selected <- which(QREMres$fittedSEMMS$gam.out$A[1,-1] > 0)
    res[cnt,]  <- c(simno, i, qn, confmat(1:1000, truepreds, selected),
                    confmat(1:1000, truepreds, selectedL1))
    cat(c(simno, i, qn, confmat(1:1000, truepreds, selected), 
          confmat(1:1000, truepreds, selectedL1)),"\n")
    cnt <- cnt+1
  }
}

# True positives (QREM), actual number of predictors is 20
print(table(res[,4],as.factor(res[,3])))
# True positives (Lasso), actual number of predictors is 20
print(table(res[,8],as.factor(res[,3])))

# False positives (QREM)
print(table(res[,5],as.factor(res[,3])))
# False positives (Lasso)
print(table(res[,9],as.factor(res[,3])))
