# Mixed Effect Modeling and Variable Selection for Quantile Regression
# Bar, Booth, and Wells (Statistical Modelling 2021)

# Code to create Figures 5 and 6

rm(list=ls())
library("QREM")
zalpha <- qnorm(0.025)
load("ERdat2006.RData")
# pre-process the data and create the variables and the data frame
# show some diagnostics plots and tables
#
y <- log(ERdat$LOV+1,base=60)
hist(y,breaks=20)
Month <- as.factor(ERdat$month)
barplot(table(Month))
DOW <- as.factor(ERdat$dow)
barplot(table(DOW))
Sex <- as.factor(ERdat$sex)
levels(Sex)  <- c("F","M")
pie(table(Sex))
plot(y~Month)
plot(y~DOW)
Race <- as.factor(ERdat$race)
barplot(table(Race))
Race2 <- Race
Race2[which(as.numeric(as.character(Race)) %in% c(-9,3,4,5,6))] <- 3
Race2 <- factor(Race2)
levels(Race2) = c("W","B","O")
barplot(table(Race2))
Age <- ERdat$age/100
hist(Age)
PayType <- as.factor(ERdat$paytype)
# 0 = Blank, 1 = Private insurance, 2 = Medicare, 3 = Medicaid/SCHIP
# 4 = Worker's Compensation, 5 = Self-pay, 6 = No charge/charity
# 7 = Other, 8 = Unknown
barplot(table(PayType))
# combine 0,7,8, 
PayType2 <- PayType
PayType2[which(as.numeric(as.character(PayType)) %in% c(2,3,4))] <- 2
PayType2[which(as.numeric(as.character(PayType)) %in% c(5))] <- 3
PayType2[which(as.numeric(as.character(PayType)) %in% c(-9,-8,0,6,7,8))] <- 4
PayType2 <- factor(PayType2)
levels(PayType2) = c("Private","GovEmp","Self","Other")
barplot(table(PayType2))

Temp <- ERdat$temp/10
hist(Temp,breaks=20)
Pulse <- ERdat$pulse
hist(Pulse,breaks=20)
SBP <- ERdat$sbp
hist(SBP,breaks=20)
DBP <- ERdat$dbp
hist(DBP,breaks=20)

Pain <- as.factor(ERdat$pain)
barplot(table(Pain))

Residence <- as.factor(ERdat$resid)
barplot(table(Residence))
ArrivalMode <- as.factor(ERdat$arrmode)
barplot(table(ArrivalMode))
ArrivalTime <- as.factor(floor(ERdat$arrtime/100))
ArrivalTime2 <- floor(ERdat$arrtime/100)
ArrivalTime2[which(ArrivalTime2 >8 & ArrivalTime2 < 20)] <- "AM"
ArrivalTime2[which(ArrivalTime2 != "AM")] <- "PM"
barplot(table(ArrivalTime))
ArrivalTime2 = as.factor(ArrivalTime2)
barplot(table(ArrivalTime2))

Region <- as.factor(ERdat$region)
levels(Region) <- c("NE","MW","S","W")
barplot(table(Region))
Metro <- as.factor(ERdat$metro)
levels(Metro) <- c("Yes", "No")
barplot(table(Metro))
Owner <- as.factor(ERdat$owner)
barplot(table(Owner))
HospCode <- ERdat$hosp 
hist(HospCode, breaks=30)

RecentVisit <- rep("N",length(HospCode))
if(length(ERdat$disch7 ) == 0) { RecentVisit[which(ERdat$seen72 == 1)] <- "Y"
} else {  RecentVisit[which(ERdat$seen72 == 1 | ERdat$disch7 ==1)] <- "Y" }
RecentVisit <- as.factor(RecentVisit)
table(RecentVisit)


dframe <- data.frame(y,Sex,Month,DOW,Race2,Age,PayType2,Temp,Pulse,
                     SBP,DBP,Pain,Residence,ArrivalMode,ArrivalTime2,
                     Region, Metro, #Owner,
                     as.factor(HospCode), RecentVisit)
colnames(dframe)[18] <- "HospCode"
qs <- c(seq(0.05, 0.95, by=0.05))
# no batch effect
linmod <- y~ Sex+Race2+Age+Region+Metro+
  PayType2+ArrivalTime2+DOW+RecentVisit
ncols <- nlevels(Sex)-1+nlevels(Race2)-1+1+
  nlevels(PayType2)-1+
  nlevels(Region)-1+nlevels(Metro)-1+nlevels(ArrivalTime2)-1+
  nlevels(DOW)-1+nlevels(RecentVisit)-1+1

res1 <- matrix(0,nrow=length(qs), ncol=2*ncols)
for (i in 1:length(qs)) {
  cat(i,qs[i],"\n")
  qremFit <- QREM("lm",linmod, dframe, qs[i])
  varKED <- bcov(qremFit, linmod=linmod, dframe, qs[i])
  res1[i,] <- c(as.numeric(qremFit$fitted.mod$coefficients), sqrt(diag(varKED)))
}


# hospital is a random effect:
linmodrnd <- y~ Sex+Race2+Age+Region+Metro+
  PayType2+ArrivalTime2+DOW+
  RecentVisit+ (1|HospCode)
ncols2 <- nlevels(Sex)-1+nlevels(Race2)-1+1+
  nlevels(PayType2)-1+
  nlevels(Region)-1+nlevels(Metro)-1+nlevels(ArrivalTime2)-1+
  nlevels(DOW)-1+nlevels(RecentVisit)-1+1

# set onlyEstimate = FALSE if you want to get regression coefficient estimates
# without running the bootstrap (which takes a long time)
res2 <- matrix(0,nrow=length(qs), ncol=2*ncols2)
onlyEstimate <- TRUE
if (onlyEstimate) {
  for (i in 1:length(qs)) {
    cat(i,qs[i],"\n")
    qremFit <- QREM(lmer,linmodrnd, dframe, qs[i], maxit = 2000)
    res2[i,] <- c(as.numeric(qremFit$coef$beta), rep(0,ncols2))
  }
} else {
  B <- 99
  for (i in 1:length(qs)) {
    cat(i,qs[i],"\n")
    bsv <-   boot.QREM(lmer, linmodrnd, dframe, qs[i], n=100,#length(unique(HospCode)), 
                        sampleFrom = "HospCode",maxit = 2000, B=B, seedno=336621, showEst = TRUE)
    res2[i,] <- c(colMeans(bsv), apply(bsv,2,sd))
  }
}

# parameter estimates with 95% confidence intervals for each predictor, by quantile
# for the two models (with/without random effect) using smooth splines
# names(fixef(qremFit$fitted.mod))
varnames <- c("(Intercept)", "SexM", "Race2B", "Race2O",  "Age", "RegionMW",   
              "RegionS", "RegionW", "MetroNo", "PayType2GovEmp", "PayType2Self", "PayType2Other" ,
              "ArrivalTime2PM", "DOW2", "DOW3", "DOW4", "DOW5", "DOW6",      
               "DOW7", "RecentVisitY")
ciCols <- c("navyblue","darkred","orange") 
sspldf=10
for (j in 1:(ncol(res1)/2)) {
  mm <- min(res1[,j]-abs(zalpha)*res1[,j+ncol(res1)/2], res2[,j]-abs(zalpha)*res2[,j+ncol(res2)/2])
  mm <- mm - abs(mm)*0.1
  MM <- max(res1[,j]+abs(zalpha)*res1[,j+ncol(res1)/2], res2[,j]+abs(zalpha)*res2[,j+ncol(res2)/2])
  MM <- MM + abs(MM)*0.1
  plot(smooth.spline(qs,res1[,j],df=sspldf),type='l', axes=F, ylim=c(mm,MM),
       main=varnames[j], ylab="Coef.", xlab="quantile",col=ciCols[1],lwd=2)
  axis(1,labels=seq(0,1,by=0.1), at=seq(0,1,by=.1)); axis(2)
  lines(smooth.spline(qs,res2[,j],df=sspldf),col=ciCols[2],lwd=2,lty=2)
  yyl <- c(res1[,j]-abs(zalpha)*res1[,j+ncol(res1)/2])
  yyu <- c(res1[,j]+abs(zalpha)*res1[,j+ncol(res1)/2])
  sspl <- smooth.spline(qs, yyl, df=sspldf)
  sspu <- smooth.spline(qs, yyu, df=sspldf)
  xx <- c(sspl$x, rev(sspu$x))
  yy <- c(sspl$y, rev(sspu$y))
  polygon(xx, yy, col = adjustcolor(ciCols[1], alpha.f=0.1), 
          border = ciCols[1], lty=1)
  
  yyl <- c(res2[,j]-abs(zalpha)*res2[,j+ncol(res2)/2])
  yyu <- c(res2[,j]+abs(zalpha)*res2[,j+ncol(res2)/2])
  sspl <- smooth.spline(qs, yyl, df=sspldf)
  sspu <- smooth.spline(qs, yyu, df=sspldf)
  xx <- c(sspl$x, rev(sspu$x))
  yy <- c(sspl$y, rev(sspu$y))
  polygon(xx, yy, col = adjustcolor(ciCols[2], alpha.f=0.1), 
          border = ciCols[2], lty=1)
  
  abline(h=0,lwd=2,col="grey66")
}
