

dati<-read.table("data_CaseStudy1.txt",header=TRUE)

source('script_mihg_all.R')


library(CUB)


m<-8



# figure sportfr1, sportfr2

sport<-dati[,11:18]

m<-8

par(mfrow=c(2,2))
par(mar=c(3.1,2.1,2,1))

nomi<-c("Football", "Swimming","Volleyball","Cycling","Basketball","Boxing","Tennis","Jogging")
for (j in 1:8){
  ordinal<-sport[,j]
  fr<-tabulate(ordinal,nbins=m)/length(ordinal)
  plot(1:m,fr,type="h",lwd=2,xlab="",ylab="",cex.axis=1.5,ylim=c(0,0.33),main=nomi[j],cex.main=2.3)
  
}

par(mfrow=c(1,1))
par(mar=c(5, 4, 4, 2) + 0.1)


################ football preferences

ordinal<-dati$calcio
m<-8

estsoccer<-mihgest(ordinal,m,maxiter=1000,toler=1e-6)
estsoccer
# $finalest
# [1] 0.61049770 0.05346659 0.69047240
# 
# $varmat
# [,1]         [,2]        [,3]
# [1,] 0.0012800163 1.236394e-04 0.001687435
# [2,] 0.0001236394 2.970818e-05 0.000223427
# [3,] 0.0016874355 2.234270e-04 0.004319856
# 
# $niter
# [1] 60
# 
# $loglikfin
# [1] -1201.298



sum(dati[,1])/length(dati[,1])
#[1] 0.5873261  # % DI DONNE CIRCA UGUALE AD 1-PAI---> CLASSI LATENTI

-2*(estsoccer$loglikfin) +3*log(length(ordinal))
#[1]  2422.013


paiestsocc<-estsoccer$finalest[1];
theta1estsocc<-estsoccer$finalest[2];
theta2estsocc<-estsoccer$finalest[3];

ordinal<-dati[,11]
freqrel<-tabulate(ordinal,nbins=m)/length(ordinal)
pr<-probmihg(m, paiestsocc, theta1estsocc, theta2estsocc)
disssoccer<-round(dissim(freqrel,pr),3)


###### figura football

plot(1:m,freqrel,type="h",xlab="",ylab="",main=paste("Football (Diss=",disssoccer,")"))
#prcub<-probcubshe2(m,0.343,0.98,0.183,8)
lines(1:m,pr,lty=2,lwd=2,col="blue")
#lines(1:m,prcub,lty=2,lwd=2,col="red")
legend("top",bty="n",legend=c("Relative Freq.","Mixture of IHG"),col=c("black","blue"),lty=c(1,2))




################################### with covariate
cov<-dati$genere


fit<-mihgfit(ordinal,Y=cov,U2=cov,m=8,maxiter=500,toler=1e-6)
# $finalest
# [1] -0.50370002  1.11787413  0.04476102 -1.42058750  2.61863292
# 
# $varmat
# [,1]          [,2]          [,3]         [,4]          [,5]
# [1,]  0.0244778178 -0.0142414915  2.871435e-04 -0.020766264  0.0099660364
# [2,] -0.0142414915  0.0882250922  9.949068e-04 -0.003465591 -0.0691764740
# [3,]  0.0002871435  0.0009949068  3.596334e-05 -0.000679738 -0.0006729382
# [4,] -0.0207662636 -0.0034655905 -6.797380e-04  0.106215812 -0.0806490685
# [5,]  0.0099660364 -0.0691764740 -6.729382e-04 -0.080649068  0.2053924204
# 
# $niter
# [1] 105
# 
# $loglikfin
# [1] -1138.937

fit$finalest/sqrt(diag(fit$varmat))
# [1] [1] -3.219479  3.763543  7.463971 -4.358865  5.778065



param<-fit$finalest
betas<-param[1:2]; theta1<-param[3]; omegas2<-param[4:5]
pai0<-logis(0,betas)
pai1<-logis(1,betas)



theta20<-1- (m-1)*logis(0,omegas2)/m
theta21<-1-(m-1)*logis(1,omegas2)/m


pr0<-probmihg(m,pai0,theta1,theta20)
pr1<-probmihg(m,pai1,theta1,theta21)


# figure  mihg_footballgender

fr<-tabulate(ordinal,nbins=m)/length(ordinal)
plot(1:m,fr,type="h",xlab="",ylab="",ylim=c(0,0.41),cex.axis=1.5,lwd=2,cex.main=2.3,main="Football: MIHG given Gender")
lines(1:m,pr0,lwd=2,col="blue")
lines(1:m,pr1,lwd=2,col="magenta")
legend("top",horiz=TRUE,bty="n",text.col=c("blue","magenta"),legend=c("Men","Women"),cex=1.5)


############## application of delta method: 



varmatbeta<-fit$varmat[1:2,1:2]

beta0<-fit$finalest[1]; beta1<-fit$finalest[2]

sepai(beta0,beta1,varmatbeta)



varmatheta2<-fit$varmat[4:5,4:5]

omega0<-fit$finalest[4]; omega1<-fit$finalest[5]

setheta2(omega0,omega1,varmatheta2,m)


################################???
### GECUB

library(CUB)

gecubfoot<-GEM(Formula(ordinal~cov|cov|cov),family="cub",shelter=8)


### figura footballshe


param<-coef(gecubfoot)
betas<-param[1:2];gamas<-param[3:4]; omegas<-param[5:6]

pai0<-logis(0,betas); pai1<-logis(1,betas);
csi0<-logis(0,gamas); csi1<-logis(1,gamas);
delta0<-logis(0,omegas); delta1<-logis(1,omegas);

prshe0<-probcubshe2(m,pai0,csi0,delta0,shelter=8)
prshe1<-probcubshe2(m,pai1,csi1,delta1,shelter=8)

# figura ihg_football3
fr<-tabulate(ordinal,nbins=m)/length(ordinal)
plot(1:m,fr,type="h",xlab="",ylab="",ylim=c(0,0.5),cex.axis=1.5,lwd=2,cex.main=2.3,main="Football: CUB with shelter at c=8")
lines(1:m,prshe0,lwd=2,col="blue")
lines(1:m,prshe1,lwd=2,col="magenta")
legend("topleft",horiz=TRUE,bty="n",horiz,text.col=c("blue","magenta"),legend=c("Men","Women"),cex=1.5)


###################### delta method



varmat<-gecubfoot$varmat
varmatbeta<-varmat[1:2,1:2]
varmatgama<-varmat[3:4,3:4]
varmatdelta<-varmat[5:6,5:6]

betas<-coef(gecubfoot)[1:2]
gamas<-coef(gecubfoot)[3:4]
deltas<-coef(gecubfoot)[5:6]


beta0<-betas[1]; beta1<-betas[2]

sepai(beta0,beta1,varmatbeta)

gama0<-gamas[1]; gama1<-gamas[2]

sepai(gama0,gama1,varmatgama)


delta0<-deltas[1]; delta1<-deltas[2]

sepai(delta0,delta1,varmatdelta)



#################### CAUB:

source("script_CAUB.R")

fitcaub<-clubcov2(ordinal,m,W=cov,Z=cov,cc=0,maxiter=500,toler=1e-6)

# $finalest
# [1]  0.2364107  3.8592151 -7.9663581 -1.5431689  1.2152046
# 
# $clubest
# [1]  0.08788543  0.73438896 -1.97039422  1.16676311
# 
# $varmat
# [,1]         [,2]         [,3]         [,4]          [,5]
# [1,]  0.0010162420 -0.001576939  0.008617286  0.001191311 -0.0003392093
# [2,] -0.0015769389  0.726528651 -0.737453408  0.067979994 -0.0693022301
# [3,]  0.0086172861 -0.737453408  1.286564533 -0.059726801  0.0453322320
# [4,]  0.0011913108  0.067979994 -0.059726801  0.029848080 -0.0288491865
# [5,] -0.0003392093 -0.069302230  0.045332232 -0.028849186  0.0389549878
# 
# $niter
# [1] 81
# 
# $loglikfin
# [1] -1145.779
# 
# $wald
# [1]  7.415980  4.527648 -7.023346 -8.932134  6.156982


param<-fitcaub$finalest

pai0<-pai1<-param[1]; 
gamas<-param[2:3]; csi0<-logis(0,gamas); csi1<-logis(1,gamas)

nus<-param[4:5]
alfa0<-exp(nus[1]); alfa1<-exp(sum(nus))


prcaub0<-probclub2c(m,pai0,csi0,alfa0,alfa0,cc=0)
prcaub1<-probclub2c(m,pai1,csi1,alfa1,alfa1,cc=0)


# ### figura caubgender_football

fr<-tabulate(ordinal,nbins=m)/length(ordinal)
plot(1:m,fr,type="h",xlab="",ylab="",ylim=c(0,0.5),cex.axis=1.5,lwd=2,cex.main=2.3,main="Football: CAUB given Gender")
lines(1:m,prcaub0,lwd=2,col="blue")
lines(1:m,prcaub1,lwd=2,col="magenta")
legend("topleft",horiz=TRUE,bty="n",horiz,text.col=c("blue","magenta"),legend=c("Men","Women"),cex=1.5)




##################################################
############### discretized Beta

source("script_DisBeta.R")

freqf<-tabulate(ordinal,m)
stimaf<-optim(par=c(1,1),fn=lldbeta,freq=freqf,method="L-BFGS-B")
stimaf

# $par
# [1] 0.3285234 0.3816354
# 
# $value
# [1] 1195.704
# 
# $counts
# function gradient 
# 13       13 
# 
# $convergence
# [1] 0
# 
# $message
# [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"


2*stimaf$value  + 2*log(647)

2*stimaf$value  + 4

pr<-disbetaprob(m,stimaf$par[1],stimaf$par[2])

### figura db2_football
pr<-disbetaprob(m,stimaf$par[1],stimaf$par[2])
plot(table(dati[,11])/647,ylim=c(0,0.4),ylab="",xlab="",main="Discretized Beta: Football Preference",cex.main=2)
lines(1:m,pr,lwd=2)
alfas<-round(stimaf$par[1],2); betas<-round(stimaf$par[2],2)
text(1.4,0.4,labels=bquote(alpha==.(alfas)),cex=1.8)
text(3,0.4,labels=bquote(beta==.(betas)),cex=1.8)



dissim(freqf/647,pr)

############## senza covariate, alfa = beta

m<-8
freqf<-tabulate(ordinal,m)
alfak<-1
stimaf<-optim(par=alfak,fn=lldbeta2,freq=freqf,method="Brent",lower=0,upper=10)
stimaf

# $par
# [1] 0.3511589
# 
# $value
# [1] 1198.775

pr<-disbeta2(m,stimaf$par)
dissim(pr,freqf/647)

########## db1_football
pr<-disbeta2(m,stimaf$par)
plot(table(dati[,11])/647,ylim=c(0,0.4),ylab="",xlab="",main="Discretized Beta: Football Preference",cex.main=2)
lines(1:m,pr,lwd=2)
alfas<-round(stimaf$par[1],2); betas<-round(stimaf$par[2],2)
text(1.4,0.4,labels=bquote(alpha==.(alfas)),cex=1.8)


## with covariate
Z<-dati$genere
nuk<-c(0,0)
stima<-optim(par=nuk,fn=lldbcov,ordinal=ordinal,Z=Z,method="L-BFGS-B",hessian=TRUE)

stima
# $par
# [1] -1.6546860  0.9729802
# 
# $value
# [1] 1176.516
# 
# $counts
# function gradient 
# 10       10 
# 
# $convergence
# [1] 0
# 
# $message
# [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

n<-length(ordinal)
BIC<-2*stima$value +2*log(n)
BIC



############## alfa diverso da beta

Z<-dati$genere
param<-c(0,0,0,0)
stima<-optim(par=param,fn=lldbcov2,ordinal=ordinal,Z1=Z,Z2=Z,method="L-BFGS-B",hessian = TRUE)

stima

# $par
# [1] -1.9043381  1.3947047 -1.0900641  0.2856868
# 
# $value
# [1] 1139.591
# 
# $counts
# function gradient 
# 16       16 
# 
# $convergence
# [1] 0

BIC<-2*stima$value + length(stima$par)*log(n)
BIC


varmat<-solve(stima$hessian)

wald<-stima$par/sqrt(diag(varmat))
wald







############################################################################################
############################################################################################
############# jogging

ordinal<-dati$Jogging

## ihg with covariate stress
stress01<-dati$stress/100
ihg_jogging<-GEM(Formula(ordinal~stress01),data=dati,family="ihg")
summary(ihg_jogging)

# ======================================================================= 
#   =====>>> IHG  model    <<<=====   ML-estimates via E-M algorithm   
# ======================================================================= 
#   m= 8  Sample size: n= 647  Iterations= 1  Maxiter= 1 
# ======================================================================= 
#   Estimates    StdErr       Wald
# constant -2.3919570 0.1425388 -16.781098
# stress01  0.5751518 0.2096061   2.743965
# ======================================================================= 
#   Log-lik            = -1340.087 
# Mean Log-likelihood= -2.071231 
# ----------------------------------------------------------------------- 
#   AIC       = 2684.173 
# BIC       = 2693.118 
# ICOMP     = 2682.429 
# ======================================================================= 
#   Elapsed time= 0.11 seconds =====>>> Thu Jul 05 15:57:46 2018 
# =======================================================================  

thetaval<-function(x){
  val<- sum(coef(ihg_jogging)*c(1,x))
  
  return(1/(1+exp(-val)))
}

xvett<-seq(0,1,by=0.01)
thetavett<-sapply(xvett,thetaval)


## figura thetastress2
plot(xvett,thetavett,type="l",xlab="stress",ylab=expression(theta),cex.lab=1.3,lwd=1.3)
abline(h=1/m,col="red",lwd=1.3)
text(0.1,1/m,label="1/m",pos=1,col="red")
points(0.77,thetaval(0.77),cex=1.5,col="red",pch=19)
abline(v=0.77,col="red",lwd=1.3)



#### mihg

mihgfit<-mihgest(ordinal,m)

# $finalest
# [1] 0.68094135 0.08100554 0.30376044
# 
# $varmat
# [,1]         [,2]        [,3]
# [1,] 0.021785693 0.0019307091 0.012606384
# [2,] 0.001930709 0.0001983913 0.001078253
# [3,] 0.012606384 0.0010782530 0.008626232
# 
# $niter
# [1] 444
# 
# $loglikfin
# [1] -1333.342


BICmihg<- -2*mihgfit$loglikfin +3*log(length(ordinal))




paiestjogg<-mihgfit$finalest[1];
theta1estjogg<-mihgfit$finalest[2];
theta2estjogg<-mihgfit$finalest[3];


### figura jogging

freqrel<-tabulate(ordinal,nbins=m)/length(ordinal)
pr<-probmihg(m, paiestjogg, theta1estjogg, theta2estjogg)
dissimjoggin<-dissim(freqrel,pr)
plot(1:m,freqrel,ylab="",xlab="",main=paste("Jogging (Diss =",round(dissimjoggin,3),")"),type="h")
lines(1:m,pr,lty=2,lwd=2,col="blue")
legend("topleft",,bty="n",legend=c("Relative Freq.","Mixture of IHG"),col=c("black","blue"),lty=c(1,2))
#[1] 0.01770688

mihgfit$finalest/sqrt(diag(mihgfit$varmat))
#4.613614 5.751306 3.270567

sqrt(diag(mihgfit$varmat))




########## with stress covariate

stress<-dati$stress
cov<-ifelse(stress<=75,0,1)
m<-8


########### chosen model

fit<-mihgfit(ordinal,U2=cov,m=8,maxiter=500,toler=1e-4)

# $finalest
# [1]  0.55221804  0.07108407  2.25209309 -0.96192543
# 
# $varmat
# [,1]          [,2]         [,3]          [,4]
# [1,]  0.025619131  2.261313e-03 -0.083923016  8.010038e-03
# [2,]  0.002261313  2.351510e-04 -0.006545628 -5.192032e-05
# [3,] -0.083923016 -6.545628e-03  0.409638532 -1.584652e-01
# [4,]  0.008010038 -5.192032e-05 -0.158465226  2.220808e-01
# 
# $niter
# [1] 264
# 
# $loglikfin
# [1] -1331.365

fit$finalest/sqrt(diag(fit$varmat))
#[1]  3.450074  4.635524  3.518730 -2.041201




param<-fit$finalest


pai<-param[1]; theta1<-param[2];


omegas2<-param[3:4]



theta20<-1- (m-1)*logis(0,omegas2)/m
theta21<-1-(m-1)*logis(1,omegas2)/m



pr0<-probmihg(m,pai,theta1,theta20)
pr1<-probmihg(m,pai,theta1,theta21)


### figure mihg_joggingstress
fr<-tabulate(ordinal,nbins=m)/length(ordinal)
plot(1:m,fr,type="h",xlab="",ylab="",ylim=c(0,0.25),cex.axis=1.5,lwd=2,cex.main=2.3,main="Jogging: MIHG given Stress Level")
lines(1:m,pr0,lwd=2,col="darkgreen")
lines(1:m,pr1,lwd=2,col="red")
legend("topleft",horiz=TRUE,bty="n",text.col=c("darkgreen","red"),legend=c("Low-Medium Stress","High Stress"),cex=1.5)




setheta2<-function(beta0,beta1,varmat,m){
  
  
  const<- -(m-1)/m
  grad0<-const*c(der1beta0(beta0,beta1,0),der1beta1(beta0,beta1,0))
  varpai0<-t(grad0)%*%varmat%*%grad0
  stdpai0<-sqrt(varpai0) 
  
  grad1<-const*c(der1beta0(beta0,beta1,1),der1beta1(beta0,beta1,1))
  
  varpai1<-t(grad1)%*%varmat%*%grad1
  stdpai1<-sqrt(varpai1) # 
  
  
  return(list('stdpai0'=stdpai0,'stdpai1'=stdpai1))
  
}

varmatheta2<-fit$varmat[3:4,3:4]

omega0<-fit$finalest[3]; omega1<-fit$finalest[4]

setheta2(omega0,omega1,varmatheta2,m)



##############################################
##############################################
### discretized beta

freqj<-tabulate(dati[,18],m)
stimaj<-optim(par=c(1,1),fn=lldbeta,freq=freqj,method="L-BFGS-B")
stimaj

# $par
# [1] 0.7724551 0.7604752
# 
# $value
# [1] 1334.696
# 
# $counts
# function gradient 
# 8        8 
# 
# $convergence
# [1] 0
# 
# $message
# [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

# alfa=beta


m<-8
freqj<-tabulate(ordinal,m)
alfak<-1
stimaj<-optim(par=alfak,fn=lldbeta2,freq=freqj,method="Brent",lower=0,upper=10)
stimaj

# $par
# [1] 0.766503
# 
# $value
# [1] 1334.746


####################################
## with covariate

dichostress<-ifelse(cov>=0.75,1,0)
Z<-cov

nuk<-c(0,0)
stima<-optim(par=nuk,fn=lldbcov,ordinal=ordinal,Z=Z,method="L-BFGS-B",hessian=TRUE)

stima


# 
# $par
# [1] -0.4175502  0.2401895
# 
# $value
# [1] 1334.281
# 
# $counts
# function gradient 
# 10       10 
# 
# $convergence
# [1] 0
# 
# $message
# [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
# 
# $par
# [1] -0.4175502  0.2401895
# 
# $value
# [1] 1334.281
# 
# $counts
# function gradient 
# 10       10 
# 
# $convergence
# [1] 0
# 
# $message
# [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"


varmat<-solve(stima$hessian)

wald<-stima$par/sqrt(diag(varmat))
wald

#################   [1] -2.4621320  0.9610949   ########## non significativo

BIC<--2*(stima$value) + 2*log(n )
# [1] 2681.507



################## alfa diverso da beta
cov<-dati$stress/100
dichostress<-ifelse(cov>=0.75,1,0)
Z<-cov

param<-c(0,0,0,0)
stima<-optim(par=param,fn=lldbcov2,ordinal=ordinal,Z1=Z,Z2=Z,method="L-BFGS-B",hessian=TRUE)

stima
# $par
# [1] -0.22474063 -0.04189254 -0.55312906  0.45296135
# 
# $value
# [1] 1331.515
# 
# $counts
# function gradient 
# 18       18 
# 
# $convergence
# [1] 0

varmat<-solve(stima$hessian)

wald<-stima$par/sqrt(diag(varmat))
wald
##  [1] -1.1918354 -0.1505698 -2.9896927  1.6624901   ################# non significativo

BIC<-2*stima$value + 4*log(647)
#[1] 2688.919
