# Multivariate statistical modelling in medical research ----------------------
# Authors: Wagner H. Bonat/UFPR and Ricardo R. Petterle/UFPR ------------------
# Date: 19/03/2020 ------------------------------------------------------------

# Installing the mcglm package ------------------------------------------------ 
# library(devtools)
# install_github("wbonat/mcglm")
# For details, see: https://github.com/wbonat/mcglm

# BEGIN HEADER ----------------------------------------------------------------
rm(list=ls())

# Loading extra packages 
require(mcglm)
require(Matrix)
library(ggplot2)
require(doBy)

# Loading extra functions 
source("eyelid_functions.r")

# Loading data set 
data <- read.table("eyelid_data_set.csv", h = TRUE, sep = ",")

# Preparing data set
levels(data$technique) <- c("Blepharoplasty", "Endoscopic", 
                          "Endoscopic + Blepharoplasty")
data$time <- as.factor(data$time)
levels(data$time) <- c("Preoperative", "30 days",
                       "90 days", "10 Years")
data$time2 <- rep(c(0,3,12,120), 30)
data$ID2 <- as.factor(data$id)
data$INDEX <- 1:dim(data)[1]
# END HEADER ------------------------------------------------------------------

# Figure2 - Exploratory data analysis -----------------------------------------
data_plot <- data.frame("Y" = c(data$middle_R, data$middle_L, 
                                data$lateral_R, data$lateral_L,
                                data$medial_R, data$medial_L),
                        "Response" = c(rep("Middle pupil", 240), 
                                       rep("Lateral canthus", 240), 
                                       rep("Medial canthus", 240)),
                        "Eye" = c(rep(c(rep("Right", 120), rep("Left", 120)), 3)),
                        "Time" = rep(data$time, 3),
                        "Technique" = rep(data$technique, 3),
                        "Id" = rep(data$ID2, 3))
levels(data_plot$Technique) <- c("Blepharoplasty", "Endoscopic", 
                                 "Endoscopic + Blepharoplasty")
levels(data_plot$Time) <- c("Preoperative", "30 days",
                            "90 days", "10 Years")
# Preparing data
data_plot$Response <- factor(data_plot$Response, 
                             levels = levels(data_plot$Response)[c(1,3,2)])
data_plot$Eye <- factor(data_plot$Eye, 
                             levels = levels(data_plot$Eye)[c(2,1)])
# Figure
ggplot(data_plot, aes(x = Time, y = Y, fill = Eye)) +
  geom_boxplot() +
  facet_grid(Response ~ Technique) + 
  xlab("")+ 
  ylab("Measurements (mm)") + 
  scale_fill_grey(start=0.9, end=0.6) + 
  theme(strip.text.x = element_text(size = 11),
        strip.text.y = element_text(size = 11),
        legend.position="top")
# Save: 9.5 x 7.3

# Covariance structures -------------------------------------------------------

# Independence
Z0 <- mc_id(data)

# Heterogeneity of variances
data$id <- 1
Z1 <- mc_dglm(~ time, id = "id", data = data)

# Exchangeable
Z2 <- mc_mixed(~ 0 + as.factor(ID2), data = data)

# Euclidean distance
Z3 <- mc_dist(id = "ID2", time = "time2", data = data)

# Removing NA's ---------------------------------------------------------------
NA.id <- data[is.na(data$middle_R) | is.na(data$middle_L) | 
                is.na(data$lateral_R) | is.na(data$lateral_L) | 
                is.na(data$medial_R) | is.na(data$medial_L),]$INDEX
data <- data[-NA.id,]

Z0 <- mc_remove_na(Z0, cod = NA.id)
Z1 <- mc_remove_na(Z1, cod = NA.id)
Z2 <- mc_remove_na(Z2, cod = NA.id)
Z3 <- mc_remove_na(Z3, cod = NA.id)

# Linear predictors -----------------------------------------------------------
# Eye: Right
form_latR <- lateral_R ~ technique*time
form_midR <- middle_R ~ technique*time
form_medR <- medial_R ~ technique*time

# Eye: Left
form_latL <- lateral_L ~ technique*time
form_midL <- middle_L ~ technique*time
form_medL <- medial_L ~ technique*time

# Univariate models -----------------------------------------------------------
# Response variable: Lateral (Right) 
# Model: Independent 
latR_ind <- mcglm(linear_pred = c(form_latR),
                  matrix_pred = list(c(Z0)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Model: Variance heterogeneity
latR_vh <- mcglm(linear_pred = c(form_latR),
                 matrix_pred = list(c(Z1)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Euclidean distance
latR_dist <- mcglm(linear_pred = c(form_latR),
                   matrix_pred = list(c(Z0, Z3)),
                   control_algorithm = list(tuning = 0.8, correct = FALSE),
                   data = data)

# Model: Exchangeable
latR_exc <- mcglm(linear_pred = c(form_latR),
                  matrix_pred = list(c(Z0, Z2)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Response variable: Middle (Right) -------------------------------------------
# Model: Independent
midR_ind <- mcglm(linear_pred = c(form_midR),
                 matrix_pred = list(c(Z0)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Variance heterogeneity
midR_vh <- mcglm(linear_pred = c(form_midR),
                  matrix_pred = list(c(Z1)),
                 control_algorithm = list(correct = FALSE),
                  data = data)

# Model: Euclidean distance
midR_dist <- mcglm(linear_pred = c(form_midR),
                  matrix_pred = list(c(Z0, Z3)),
                  control_algorithm = list(tuning = 0.8, correct = FALSE),
                  data = data)

# Model: Exchangeable
midR_exc <- mcglm(linear_pred = c(form_midR),
                  matrix_pred = list(c(Z0, Z2)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Response variable: Medial (Right) -------------------------------------------
# Model: Independent
medR_ind <- mcglm(linear_pred = c(form_medR),
                 matrix_pred = list(c(Z0)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Variance heterogeneity
medR_vh <- mcglm(linear_pred = c(form_medR),
                 matrix_pred = list(c(Z1)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Euclidean distance
medR_dist <- mcglm(linear_pred = c(form_medR),
                   matrix_pred = list(c(Z0, Z3)),
                   control_algorithm = list(tuning = 0.8, correct = FALSE),
                   data = data)

# Model: Exchangeable
medR_exc <- mcglm(linear_pred = c(form_medR),
                  matrix_pred = list(c(Z0, Z2)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Response variable: Lateral (Left) -------------------------------------------
# Model: independent 
latL_ind <- mcglm(linear_pred = c(form_latL),
                  matrix_pred = list(c(Z0)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Model: Variance heterogeneity
latL_vh <- mcglm(linear_pred = c(form_latL),
                 matrix_pred = list(c(Z1)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Euclidean distance
latL_dist <- mcglm(linear_pred = c(form_latL),
                   matrix_pred = list(c(Z0, Z3)),
                   control_algorithm = list(tuning = 0.8, correct = FALSE),
                   data = data)

# Model: Exchangeable
latL_exc <- mcglm(linear_pred = c(form_latL),
                  matrix_pred = list(c(Z0, Z2)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Response variable: Middle (Left) -------------------------------------
# Model: Independent
midL_ind <- mcglm(linear_pred = c(form_midL),
                  matrix_pred = list(c(Z0)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Model: Variance heterogeneity
midL_vh <- mcglm(linear_pred = c(form_midL),
                 matrix_pred = list(c(Z1)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Euclidean distance
midL_dist <- mcglm(linear_pred = c(form_midL),
                   matrix_pred = list(c(Z0, Z3)),
                   control_algorithm = list(correct = FALSE),
                   data = data)

# Model: Exchangeable
midL_exc <- mcglm(linear_pred = c(form_midL),
                  matrix_pred = list(c(Z0, Z2)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Response variable: Medial (Left) --------------------------------------------
# Model: Independent
medL_ind <- mcglm(linear_pred = c(form_medL),
                  matrix_pred = list(c(Z0)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Model: Variance heterogeneity
medL_vh <- mcglm(linear_pred = c(form_medL),
                 matrix_pred = list(c(Z1)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Euclidean distance
medL_dist <- mcglm(linear_pred = c(form_medL),
                   matrix_pred = list(c(Z0, Z3)),
                   control_algorithm = list(tuning = 0.8, correct = FALSE),
                   data = data)

# Model: Exchangeable
medL_exc <- mcglm(linear_pred = c(form_medL),
                  matrix_pred = list(c(Z0, Z2)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Multivariate models ---------------------------------------------------------
# Model: Independent
mult_ind <- mcglm(linear_pred = c(form_latR, form_midR, form_medR, 
                                  form_latL, form_midL, form_medL),
                  matrix_pred = list(c(Z0),c(Z0),c(Z0),c(Z0),c(Z0),c(Z0)),
                  control_algorithm = list(correct = FALSE),
                  data = data)

# Model: Variance heterogeneity
mult_vh <- mcglm(linear_pred = c(form_latR, form_midR, form_medR, 
                                 form_latL, form_midL, form_medL),
                 matrix_pred = list(c(Z1),c(Z1),c(Z1),c(Z1),c(Z1),c(Z1)),
                 control_algorithm = list(correct = FALSE),
                 data = data)

# Model: Euclidean distance
mult_dist <- mcglm(linear_pred = c(form_latR, form_midR, form_medR, 
                                   form_latL, form_midL, form_medL),
                   matrix_pred = list(c(Z0,Z3),c(Z0,Z3),c(Z0,Z3),
                                      c(Z0,Z3),c(Z0,Z3),c(Z0,Z3)),
                   data = data, 
                   control_algorithm = list(max_iter = 100, 
                                            tuning = 0.7,
                                            verbose = T,
                                            correct = F))

# Model: Exchageable
mult_exc <- mcglm(linear_pred = c(form_latR, form_midR, form_medR, 
                                  form_latL, form_midL, form_medL),
                  matrix_pred = list(c(Z0,Z2),c(Z0,Z2),c(Z0,Z2),
                                     c(Z0,Z2),c(Z0,Z2),c(Z0,Z2)),
                  data = data, 
                  control_algorithm = list(max_iter = 65, 
                                           correct = FALSE,
                                           tuning = 0.8))

# Comparing models ------------------------------------------------------------
# Univariate
tab_uni <- rbind(gof(list(latR_ind, midR_ind, medR_ind, latL_ind, midL_ind, medR_ind)),
                 gof(list(latR_vh, midR_vh, medR_vh, latL_vh, midL_vh, medL_vh)),
                 gof(list(latR_dist, midR_dist, medR_dist, latL_dist, midL_dist, medL_dist)),
                 gof(list(latR_exc, midR_exc, medR_exc, latL_exc, midL_exc, medL_exc)))[,-4]
# Multivariate
tab_mult <- rbind(gof(mult_ind), gof(mult_vh), gof(mult_dist), gof(mult_exc))[,-4]
rownames(tab_uni) <- c("Independent", "Variance het.", "Euclidean dist.", "Exchangeable")
rownames(tab_mult) <- c("Independent", "Variance het.", "Euclidean dist.", "Exchangeable")

# Table 1 - Measures of goodness-of-fit ---------------------------------------
cbind(tab_uni, tab_mult) 

# Table 2 - Multivariate analysis of variance ---------------------------------
mc_manova(mult_exc)

# Table 3 - ANOVA Wald test ---------------------------------------------------
anova(mult_exc) 

# Table 4 - Dispersion parameters ---------------------------------------------
tau = coef(mult_exc, type = "tau")$Estimates
COV = 2*(-as.matrix(mult_exc$joint_inv_sensitivity))
COV = COV[88:99, 88:99]
std_tau = sqrt(diag(-2*mult_exc$joint_inv_sensitivity[88:99, 88:99]))
z_tau = tau/std_tau
tab <- cbind("Estimates" = tau, "SE" = std_tau, "Z" = z_tau) 
tab_R <- cbind(tab[1:2,], tab[3:4,], tab[5:6,])
tab_L <- cbind(tab[7:8,], tab[9:10,], tab[11:12,])
tab4 <- rbind(tab_R, tab_L)
tab4

# Table 5 - Intraclass correlation --------------------------------------------
# Lateral canthus - Right 
int_lat_R = tau[2]/(tau[1] + tau[2])
std_int_lat_R = deltamethod(~ x2/(x1 + x2), mean = tau[c(1,2)], 
                                cov = COV[c(1,2), c(1,2)])
# Middle pupil - Right 
int_middle_R = tau[4]/(tau[3] + tau[4])
std_int_middle_R = deltamethod(~ x2/(x1 + x2), mean = tau[c(3,4)], 
                                   cov = COV[c(3,4), c(3,4)])
# Medial canthus - Right 
int_medial_R = tau[6]/(tau[5] + tau[6])
std_int_medial_R = deltamethod(~ x2/(x1 + x2), mean = tau[c(5,6)], 
                                   cov = COV[c(5,6), c(5,6)])
# Lateral canthus - Left
int_lat_L = tau[8]/(tau[7] + tau[8])
std_int_lat_L = deltamethod(~ x2/(x1 + x2), mean = tau[c(7,8)], 
                            cov = COV[c(7,8), c(7,8)])
# Middle pupil - Left 
int_middle_L = tau[10]/(tau[9] + tau[10])
std_int_middle_L = deltamethod(~ x2/(x1 + x2), mean = tau[c(9,10)], 
                               cov = COV[c(9,10), c(9,10)])
# Medial canthus - Left 
int_medial_L = tau[12]/(tau[11] + tau[12])
std_int_medial_L = deltamethod(~ x2/(x1 + x2), mean = tau[c(11,12)], 
                               cov = COV[c(11,12), c(11,12)])
# Right -----------------------------------------------------------------------
Lateral_R = data.frame("Estimates" = int_lat_R, "Std.Error" = std_int_lat_R, 
                     "Z-value" = int_lat_R/std_int_lat_R)

Middle_R = data.frame("Estimates" = int_middle_R, "Std.Error" = std_int_middle_R, 
                    "Z-value" = int_middle_R/std_int_middle_R)

Medial_R = data.frame("Estimates" = int_medial_R, "Std.Error" = std_int_medial_R, 
                    "Z-value" = int_medial_R/std_int_medial_R)
# Left ------------------------------------------------------------------------
Lateral_L = data.frame("Estimates" = int_lat_L, "Std.Error" = std_int_lat_L, 
                       "Z-value" = int_lat_L/std_int_lat_L)

Middle_L = data.frame("Estimates" = int_middle_L, "Std.Error" = std_int_middle_L, 
                      "Z-value" = int_middle_L/std_int_middle_L)

Medial_L = data.frame("Estimates" = int_medial_L, "Std.Error" = std_int_medial_L, 
                      "Z-value" = int_medial_L/std_int_medial_L)
# Preparing table 
tab_R <- cbind(Lateral_R, Middle_R, Medial_R)
tab_L <- cbind(Lateral_L, Middle_L, Medial_L) 
tab5 <- rbind(tab_R, tab_L)
rownames(tab5) <- c("Right", "Left")
tab5

# Correlation matrix ----------------------------------------------------------
rho = coef(mult_exc, type = "correlation")$Estimates
std_rho = coef(mult_exc, type = "correlation", std.error = TRUE)$Std.error
z_rho = rho/std_rho
round(cbind(rho, std_rho), dig = 2)

## Overall responses ----------------------------------------------------------
lat_R.lm <- lm(lateral_R ~ technique * time, data = data)
lsm <- LSmeans(lat_R.lm, effect = c("time", "technique"))
L <- by(lsm$L, INDICES = lsm$grid$technique, FUN = as.matrix)
L <- lapply(L, "rownames<-", levels(data$time))
K <- lapply(L, apc)
K[[1]][1,]

# Table 6 Contrasts --------------------------------------------------
G <- Diagonal(6, 1) # Diagonal matrix resp x resp

# Right
K_list1 <- list()
for(i in 1:dim(K[[1]])[1]) {
  K_list1[[i]] <- kronecker(G, t(K[[1]][i,]))
}
MANOVA_Blepharoplasty = mc_manova(mult_exc, CC = K_list1, names = rownames(K[[1]]))

K_list2 <- list()
for(i in 1:dim(K[[2]])[1]) {
  K_list2[[i]] <- kronecker(G, t(K[[2]][i,]))
}
MANOVA_Endoscopic = mc_manova(mult_exc, CC = K_list2, names = rownames(K[[2]]))

K_list3 <- list()
for(i in 1:dim(K[[3]])[1]) {
  K_list3[[i]] <- kronecker(G, t(K[[3]][i,]))
}
MANOVA_End_Ble = mc_manova(mult_exc, CC = K_list3, names = rownames(K[[3]]))

tab6 <- rbind(MANOVA_Blepharoplasty, MANOVA_Endoscopic, MANOVA_End_Ble)
tab6

# Table 7 and table 8 - Interaction effects -----------------------------------
lat_R.lm <- lm(lateral_R ~ technique * time , data = data)
lsm <- LSmeans(lat_R.lm, effect = c("time", "technique"))
L <- by(lsm$L, INDICES = lsm$grid$technique, FUN = as.matrix)
L <- lapply(L, "rownames<-", levels(data$time))
K <- lapply(L, apc)

# Lateral
lat_R_tec_time = lapply(K,
                      FUN = function(k) {
                        betak <- k %*% coef(mult_exc, type = "beta", response = 1)$Estimates
                        vcovk <- k %*% vcov(mult_exc)[1:12,1:12] %*% t(k)
                        ep <- sqrt(diag(vcovk))
                        z <- betak/ep
                        p <- 2 * pnorm(abs(z), lower.tail = FALSE)
                        p <- p.adjust(p, method = "bonferroni")
                        round(data.frame(Est = betak, EP = ep, z = z, pval = p),2)
                      })
# Middle
mid_R_tec_time = lapply(K,
                      FUN = function(k) {
                        betak <- k %*% coef(mult_exc, type = "beta", response = 2)$Estimates
                        vcovk <- k %*% vcov(mult_exc)[13:24,13:24] %*% t(k)
                        ep <- sqrt(diag(vcovk))
                        z <- betak/ep
                        p <- 2 * pnorm(abs(z), lower.tail = FALSE)
                        p <- p.adjust(p, method = "bonferroni")
                        round(data.frame(Est = betak, EP = ep, z = z, pval = p),2)
                      })
# Medial
med_R_tec_time = lapply(K,
                      FUN = function(k) {
                        betak <- k %*% coef(mult_exc, type = "beta", response = 3)$Estimates
                        vcovk <- k %*% vcov(mult_exc)[25:36,25:36] %*% t(k)
                        ep <- sqrt(diag(vcovk))
                        z <- betak/ep
                        p <- 2 * pnorm(abs(z), lower.tail = FALSE)
                        p <- p.adjust(p, method = "bonferroni")
                        round(data.frame(Est = betak, EP = ep, z = z, pval = p),2)
                      })
# Table 7
lat_R_tec_time
mid_R_tec_time
med_R_tec_time

# Left ------------------------------------------ 
# Lateral
lat_L_tec_time = lapply(K,
                        FUN = function(k) {
                          betak <- k %*% coef(mult_exc, type = "beta", response = 4)$Estimates
                          vcovk <- k %*% vcov(mult_exc)[37:48,37:48] %*% t(k)
                          ep <- sqrt(diag(vcovk))
                          z <- betak/ep
                          p <- 2 * pnorm(abs(z), lower.tail = FALSE)
                          p <- p.adjust(p, method = "bonferroni")
                          round(data.frame(Est = betak, EP = ep, z = z, pval = p),2)
                        })
# Middle
mid_L_tec_time = lapply(K,
                        FUN = function(k) {
                          betak <- k %*% coef(mult_exc, type = "beta", response = 5)$Estimates
                          vcovk <- k %*% vcov(mult_exc)[49:60,49:60] %*% t(k)
                          ep <- sqrt(diag(vcovk))
                          z <- betak/ep
                          p <- 2 * pnorm(abs(z), lower.tail = FALSE)
                          p <- p.adjust(p, method = "bonferroni")
                          round(data.frame(Est = betak, EP = ep, z = z, pval = p),2)
                        })

# Medial
med_L_tec_time = lapply(K,
                        FUN = function(k) {
                          betak <- k %*% coef(mult_exc, type = "beta", response = 6)$Estimates
                          vcovk <- k %*% vcov(mult_exc)[61:72,61:72] %*% t(k)
                          ep <- sqrt(diag(vcovk))
                          z <- betak/ep
                          p <- 2 * pnorm(abs(z), lower.tail = FALSE)
                          p <- p.adjust(p, method = "bonferroni")
                          round(data.frame(Est = betak, EP = ep, z = z, pval = p),2)
                        })
# Table 8
lat_L_tec_time
mid_L_tec_time
med_L_tec_time
# END -------------------------------------------------------------------------