#######################################################################
## Analiza danych niepewnych: Introduction to fuzzy statistics
## Prof. Antonio Calcagnì (antonio.calcagni@unipd.it)
#######################################################################


## CONTENTS ###########################################################
# (A) Dataset and first manipulations
# (B) Fuzzy regression models
# (C) Bootstrap applications (A few of)
#######################################################################


# Initial settings --------------------------------------------------------
rm(list=ls()); graphics.off()
setwd("~/MEGA/Lavoro_sync/Didattica/2024_2025/ka131/lab/") #change it according to your local path!
library(SAFD); library(FuzzyNumbers); library(fuzzyreg)
source("utilities.R")



# (A) Dataset and first manipulations -------------------------------------
load("data_restaurant.RData")
str(data_restaurant)
# Data were originally collected by de Sáa et al. (2014) and refers to a survey of 14 items administered to a 
# sample of n = 70 customers of different age, background, and occupation. The goals was to assess the perceived quality of restaurants.
# See: de Sáa, S. D. L. R., Gil, M. Á., González-Rodríguez, G., López, M. T., & Lubiano, M. A. (2014). Fuzzy rating scale-based questionnaires and their statistical analysis. IEEE Transactions on Fuzzy Systems, 23(1), 111-126.
# The dataset contains trapezoidal fuzzy numbers with the following parameters: 
# "_inf0" -> Infima of the 0-cut of the fuzzy set selected
# "_inf1" -> Infima of the 1-cut of the fuzzy set selected
# "_sup0" -> Infima of the 1-cut of the fuzzy set selected
# "_sup1" -> Suprema of the 1-cut of the fuzzy set selected
# The suffix QF, QR, QP stand for three different evaluations (ie, foods, restaurant/service, price) whereas the numbers indicate the variable/item of the questionnaire.
n <- NROW(data_restaurant)


#### Compute social indices ####

# Computing the QF variable as average over QF-items
datax_QF <- array(data = NA,dim = c(n,4,5)) #store QF data into a 'units x trpz paramaters x num of QF items' array
for(j in 1:5){
  datax_QF[,,j] <- as.matrix(data_restaurant[,grep(x = colnames(data_restaurant),pattern <- paste0("QF",j))])
}
qf <- list()
for(i in 1:n){ #fuzzy mean 
  s <- TrapezoidalFuzzyNumber(a1 = datax_QF[i,1,1],a4 = datax_QF[i,4,1],a2 = datax_QF[i,2,1],a3 = datax_QF[i,3,1])
  for(j in 2:5){
    s <- s + TrapezoidalFuzzyNumber(a1 = datax_QF[i,1,j],a4 = datax_QF[i,4,j],a2 = datax_QF[i,2,j],a3 = datax_QF[i,3,j])
  }
  qf[[i]] <- 1/5 * s
}

# Computing the QR variable as average over QR-items
datax_QR <- array(data = NA,dim = c(n,4,8)) #store QR data into a 'units x trpz paramaters x num of QR items' array 
for(j in 1:8){
  datax_QR[,,j] <- as.matrix(data_restaurant[,grep(x = colnames(data_restaurant),pattern = paste0("QR",j))])
}
qr <- list()
for(i in 1:n){ #fuzzy mean
  s <- TrapezoidalFuzzyNumber(a1 = datax_QR[i,1,1],a4 = datax_QR[i,4,1],a2 = datax_QR[i,2,1],a3 = datax_QR[i,3,1])
  for(j in 2:8){
    s <- s + TrapezoidalFuzzyNumber(a1 = datax_QR[i,1,j],a4 = datax_QR[i,4,j],a2 = datax_QR[i,2,j],a3 = datax_QR[i,3,j])
  }
  qr[[i]] <- 1/8 * s
}

# Computing the QP variable as average over QP-items (single item)
datax_QP <- array(data = NA,dim = c(n,4)) #store data into a 'units x trpz paramaters' array
datax_QP <- as.matrix(data_restaurant[,grep(x = colnames(data_restaurant),pattern = paste0("QP",1))])
qp <- list()
for(i in 1:n){ #fuzzy mean
  qp[[i]]=TrapezoidalFuzzyNumber(a1 = datax_QP[i,1],a4 = datax_QP[i,4],a2 = datax_QP[i,2],a3 = datax_QP[i,3])
}


#### Compute additional variables ####

# Duration of dinners (eg, it can be used as predictor)
data_restaurant$time_sup[6] <- "24:30" #fix a possible coding error
elapsTime <- difftime(strptime(data_restaurant$time_sup,format="%H:%S"),strptime(data_restaurant$time_inf,format="%H:%S"),units <- "mins")

# Extracting fuzzy quantities in LR forms for the QR variable (eg, it can be used as response variable)
qr_leftS <- qr_rightS <- qr_core1 <- qr_core2 <- rep(NA,n)
for(i in 1:n){
  qr_core1[i] <- core(qr[[i]])[1]                #trpz parameter: a2
  qr_core2[i] <- core(qr[[i]])[2]                #trpz parameter: a3
  qr_leftS[i] <- qr_core1[i]-supp(qr[[i]])[1]    #trpz parameter: a1
  qr_rightS[i] <- qr_core2[i]+supp(qr[[i]])[2]   #trpz parameter: a4
}

# Summary measures for some variables (eg, they can be used as predictors)
qr_defuzz <- unlist(lapply(qr,value))            
qp_defuzz <- unlist(lapply(qp,value))
qp_amb <- unlist(lapply(qp,ambiguity)) 


#### Create the final dataset for regression analysis ####
data_restaurant$restaurant_type2 <- rep(1,n) #recode restaurants into two disjoint levels
data_restaurant$restaurant_type2[data_restaurant$restaurant_type=="Self-service restaurant" | data_restaurant$restaurant_type=="Fast food restaurant"] <- 0

data_reg <- data.frame(qr=qr_defuzz,time=as.numeric(elapsTime),sex=as.factor(data_restaurant$sex),
                      type=as.factor(data_restaurant$restaurant_type2),
                      qp=qp_defuzz,qp_unc=qp_amb,
                      qr_core1=qr_core1,qr_core2=qr_core2,qr_leftS=qr_leftS,qr_rightS=qr_rightS,
                      qr_coreAvg=(qr_core1+qr_core2)/2)

data_reg <- data_reg[complete.cases(data_reg),] #remove NAs
str(data_reg)

#load("data_restaurant2.RData")


# (B) Fuzzy regression models ---------------------------------------------

# For the sake of simplicity, consider the simplest case of predicting 'qr' as a function of 'qp_unc'
# Note: 'qp_unc' can be interpreted as the uncertainty in assessing the 'quality of price' of a given restaurant


#### Fit a (standard) Normal linear model ####
out_lm <- lm(formula = qr~qp_unc,data = data_reg)
summary(out_lm)
x11();plot(effects::allEffects(out_lm)) #alternatively: car::avPlots(out_lm)


#### Fit a Normal linear model via fuzzy maximum likelihood
#X <- cbind(1,data_reg$sex)
X <- model.matrix(~data_reg$qp_unc)
J <- NCOL(X)
n <- nrow(data_reg)

xpar <- c(80.0,-0.5,10.0) #starting points
res <- optim(fn = likelFun,par = xpar,method = "L-BFGS-B",lower = c(rep(-Inf,J),1e-9), control = list(trace=3,fnscale=-10,maxit=10,pgtol = 1e-4),
      m1=data_reg$qr_core1,m2=data_reg$qr_core2,l=data_reg$qr_leftS,r=data_reg$qr_rightS,X=X,hessian=TRUE)

res$par #regression coeffs
#sqrt(diag(solve(res$hessian))) #standard errors for the estimates (just in case)

#### Fit a fuzzy linear model using PLRS (Tanaka's approach) ####
out_plrs <- fuzzylm(formula = qr~qp_unc,data = data_reg,method = "plrls") #qr is crisp
summary(out_plrs)

# Note:
#
# GOF:
# The mean squared distance is based on Diamond's distance of two variables representing
# triangular fuzzy numbers, where one is the response variable and the other is the prediction from a fuzzy regression model.
# The Diamond's distance of two triangular fuzzy numbers is sum of squared differences of the core and both support values of the fuzzy numbers.
#
# TEF:
# The total error of fit of a fuzzy regression model is based on the difference in membership functions of triangular fuzzy numbers
# between the estimated and observed fuzzy dependent variables. 
# Not suitable for assessing models fitted on crisp input data.

# Plotting the results
x11();plot(out_plrs)
abline(out_lm,col=2)


#### Fit a fuzzy linear model using FLS (Diamond's approach based on fuzzy least squares)  ####
# Note: This method allows for a single predictor only.
#       Use the triangular version of 'qr' where qr_coreAvg is the midpoint of the core interval of the original trpz fuzzy variable 'qr'
out_fls <- fuzzylm(formula = qr_coreAvg~qp_unc,data = data_reg,fuzzy.left.y = "qr_leftS",fuzzy.right.y  = "qr_rightS", method = "fls") #qr is fuzzy here
summary(out_fls)
x11();plot(out_fls)


#### Fit a (interactive) fuzzy linear model based on least squares ####
#X <- cbind(1,data_reg$qp_unc)
J <- NCOL(X)-1

# Minimize the objective least squares function
res <- optim(fn = flr1,par = rep(1,J+3),X,m=data_reg$qr_coreAvg,l=data_reg$qr_leftS,r=data_reg$qr_rightS,X=X,J=J)
beta_est <- res$par
print(beta_est)

# Compute some fit indices and plots
Ypred <- flr1_predict(X,beta_est) #predict element-wise the 'qr' trg fuzzy variable
rss(m=data_reg$qr_coreAvg,l=data_reg$qr_leftS,r=data_reg$qr_rightS,mStar <- Ypred[,1],lStar <- Ypred[,2],rStar <- Ypred[,3])

x11();plot(0,0,xlim=c(-2,23),ylim=c(00,350),col=0,xlab="qp unc",ylab="fuzzy qr",bty="n")
rect(ybottom = data_reg$qr_coreAvg-data_reg$qr_leftS,ytop = data_reg$qr_coreAvg+data_reg$qr_rightS,
     xleft = data_reg$qp_unc,xright <- data_reg$qp_unc+1,
     lty=2,col="lightgray")
lines(data_reg$qp_unc,Ypred[,1],lwd=1.5)
lines(data_reg$qp_unc,Ypred[,1]-Ypred[,2],lwd=1.5)
lines(data_reg$qp_unc,Ypred[,1]+Ypred[,3],lwd=1.5)


#### Approximate the fuzzy regression problem using a functional regression approach ####
library(fda)
library(refund)

xsup <- seq(from = min(data_reg$qr_core1-data_reg$qr_leftS),to = max(data_reg$qr_core2+data_reg$qr_rightS),length=101) #overall support for all the fuzzy observations

# Compute the fuzzy membership function for all subjects on xsup
Mu_mat <- sapply(1:n,function(i)evaluate(TrapezoidalFuzzyNumber(a1 = data_reg$qr_core1[i]-data_reg$qr_leftS[i],a2 = data_reg$qr_core1[i],a3 = data_reg$qr_core2[i],a4 = data_reg$qr_core2[i]+data_reg$qr_rightS[i]),xsup))

# Approximate fuzzy membership functions with B-spline basis
basis <- create.bspline.basis(rangeval = c(min(xsup)-10,max(xsup)+10), nbasis = 100) # the number of basis functions
fda_obj <- fdPar(basis, Lfdobj = int2Lfd(2), lambda = 1e-2) #lambda is the tuning parameter (it needs to be choosen via CV)
fda_mu <- smooth.basis(xsup, Mu_mat, fda_obj)$fd

x11(); plot(fda_mu)

x11();plot(0,0,col=0,bty="n",xlim=c(min(xsup)-25,max(xsup)),ylim=c(0,1)); apply(Mu_mat,2,function(x)lines(xsup,x,lty=2))
lines(fda_mu)


# Define and fit a functional regression problem
# We aim at modeling the so-called function-on-scalar regression:
#                         xi_i(t) = β0(t) + β1(t)x_i1 + β2(t)x_i2 + ⋯ +ε_i(t)
# where xi_i(t) is the functional response/outcome (ie, the fuzzy observation) whereas x_1..x_J are the non-functional predictors.
# Note that regression coefficients are functions too (ie, functional regression coeffs) and so they need to be represented in terms of B-splines
# as done for the functional outcome.
fda_xsup <- seq(fda_mu$basis$rangeval[1], fda_mu$basis$rangeval[2], length.out = 101)
Y <- t(eval.fd(fdobj = fda_mu,evalarg = fda_xsup))

mod0 <- refund::pffr(formula = Y~1,yind = fda_xsup)
mod1 <- refund::pffr(formula = Y ~ sex,yind = fda_xsup,data = data_reg)
AIC(mod0,mod1)

summary(mod1)

# An example of predicted vs. observed (ie, smoothed, approximated) fuzzy observations
i <- 31;plot(Y[i,],type="l",bty="n"); lines(predict(mod1)[i,],col=2)



# (C) Bootstrap applications (A few of) -----------------------------------

# Note: The bootstrap procedures from the library FuzzyResampling require as input 
# matrices of observed fuzzy parameters (ie, each column is a parameter).

n <- NROW(data_reg)
Ydata <- matrix(data <- NA,nrow <- n,ncol <- 4) #consider the trg version of the fuzzy variable 'qr'
Ydata[,1] <- data_reg$qr_coreAvg - data_reg$qr_leftS   #left bounds
Ydata[,2] <- data_reg$qr_coreAvg                       #cores
Ydata[,3] <- data_reg$qr_coreAvg                       #cores
Ydata[,4] <- data_reg$qr_coreAvg + data_reg$qr_rightS  #right bounds

#### Standard Efron's bootstrap ####
# Note: The procedure works as the standard Efron's non-parametric bootstrap.
# It is equivalent to FuzzyResampling::ClassicalBootstrap.

# Fuzzy linear model using FLS 
set.seed(131)
B=1000
Beta_boot <- matrix(data <- NA,nrow <- B,ncol <- 3) #each column is a regression parameter (in this case, three) -- no intercept
for(b in 1:B){
  iid <- sample(x <- 1:n,size <- n,replace <- TRUE)
  Y_boot <- Ydata[iid,]
  data_boot <- data.frame(c=Y_boot[,2],l=Y_boot[,2]-Y_boot[,1],r=Y_boot[,2]+Y_boot[,4],qp_unc=data_reg$qp_unc)
  Beta_boot[b,] <- fuzzylm(formula <- c~qp_unc,data <- data_boot,fuzzy.left.y <- "l",fuzzy.right.y  <- "r", method <- "fls")$coef[2,]
  if(b%%100 == 0){print(b)}
}
summary(Beta_boot)
x11();par(mfrow=c(1,3));mapply(function(j){hist(Beta_boot[,j])},1:3)

beta_sd <- apply(Beta_boot,2,sd)
Beta_ci <- apply(Beta_boot,2,function(x)quantile(x,probs <- c(0.05/2, 1 - 0.05/2), type <- 1))

# (interactive) Fuzzy linear model based on least squares
set.seed(131)
B=1000
Beta_boot <- matrix(data <- NA,nrow <- B,ncol <- 4) #each column is a regression parameter (in this case, four)
for(b in 1:B){
  iid <- sample(x <- 1:n,size <- n,replace <- TRUE)
  Y_boot <- Ydata[iid,]
  Beta_boot[b,] <- optim(fn <- flr1,par <- rep(1,J+3),X,m=Y_boot[,2],l=Y_boot[,2]-Y_boot[,1],r=Y_boot[,2]+Y_boot[,4],X=X,J=J)$par
  if(b%%100 == 0){print(b)}
}
summary(Beta_boot)
x11();par(mfrow=c(1,4));mapply(function(j){hist(Beta_boot[,j])},1:4)

beta_sd <- apply(Beta_boot,2,sd) #approximated
Beta_ci <- apply(Beta_boot,2,function(x)quantile(x,probs <- c(0.05/2, 1 - 0.05/2), type <- 1)) #approximated


#### VAA bootstrap method ####
# See: ?FuzzyResampling::VAAMethod

# Fuzzy linear model using FLS 
set.seed(131)
B=1000
Beta_boot <- matrix(data <- NA,nrow <- B,ncol <- 3) #each column is a regression parameter (in this case, three) -- no intercept
for(b in 1:B){
  Y_boot <- VAAMethod(initialSample <- Ydata,b <- n)
  data_boot <- data.frame(c=Y_boot[,2],l=Y_boot[,2]-Y_boot[,1],r=Y_boot[,2]+Y_boot[,4],qp_unc=data_reg$qp_unc)
  Beta_boot[b,] <- fuzzylm(formula <- c~qp_unc,data <- data_boot,fuzzy.left.y <- "l",fuzzy.right.y  <- "r", method <- "fls")$coef[2,]
  if(b%%100 == 0){print(b)}
}
summary(Beta_boot)
x11();par(mfrow=c(1,3));mapply(function(j){hist(Beta_boot[,j])},1:3)

beta_sd <- apply(Beta_boot,2,sd) #approximated
Beta_ci <- apply(Beta_boot,2,function(x)quantile(x,probs <- c(0.05/2, 1 - 0.05/2), type <- 1)) #approximated


#### VAA bootstrap on theequality test of the Aumann-type mean ####
# This test checks the null hypothesis that the Aumann-type mean of the fuzzy numbers is equal to a given fuzzy number mu_0.
# Note: It returns the p-value of the one-sample test for the mean.
OneSampleCTest(initialSample <- Ydata,mu_0 <- c(78,83,83,190),numberOfSamples <- 1000,resamplingMethod <- "VAMethod") #the test does not get rejected 










