 #######################################################################
## Analiza danych niepewnych: Introduction to fuzzy statistics
## Prof. Antonio Calcagnì (antonio.calcagni@unipd.it)
#######################################################################


## CONTENTS ###########################################################
# (A) Descriptive analyses on fuzzy numbers (univariate)
# (B) Descriptive analyses on fuzzy numbers (bivariate)
#######################################################################


# Initial settings --------------------------------------------------------
rm(list=ls()); graphics.off()
setwd("~/MEGA/Lavoro_sync/Didattica/2024_2025/ka131/lab/") #change it according to your local path!
library(SAFD); library(FuzzyNumbers)
source("utilities.R")

## Note: 
# In general, there is no strict compatibility among the existing R libraries for fuzzy data analysis. 
# Thus, before calling a certain function on a given sample of fuzzy numbers, the user should check whether the input is on the proper format required by the function.


# (A) Descriptive analyses on fuzzy numbers (univariate) ------------------

#### A first look at a fuzzy dataset from SAFD ####
data("Trees")
str(Trees,1)
# The dataset is a list containing three datasets about the quality of three main species of trees in Asturias: Betulla (n=133), Quercus (n=109), Rowan (n=37)
# Each tree was assigned a trapezoidal fuzzy number that models the experts subjective perceptions of the tree quality on a scale from 0 to 5.

datax <- Trees$species3
str(datax,1)
# List containing as many elements as the number of statistical units. Each element is a dataframe containing two variables:
# 'x' the extrema of the alpha-cuts and 'alpha' the corresponding cut on the vertical axis
# In this case, the horizontal representation for a fuzzy number is used.

# In case, one can generate a list where each element represents a fuzzy observation
n <- length(datax); m <- 11
datax2 <- list()
for(i in 1:n){
  datax2[[i]] <- translator(X <- datax[[i]],nl <- m) 
}

# An univariate plot (vertical representation)
x11();plot(datax2[[1]]$x,datax2[[1]]$alpha,type= "l",bty= "none",xlab="",ylab="csi",xlim=c(0,6))
for(i in 2:n){
  lines(datax2[[i]]$x,datax2[[i]]$alpha,col=i)
}


#### From the library 'SAFD' to 'FuzzyNumbers' ####
# More info at: https://cran.r-project.org/web/packages/FuzzyNumbers/vignettes/FuzzyNumbersTutorial.pdf

# To get a fuzzy observation using the class provided by the library 'FuzzyNumbers', consider the FuzzyNumbers() function and the i=1 observation:
x0 <- datax2[[1]]$x[datax2[[1]]$alpha==0] #support
x1 <- datax2[[1]]$x[datax2[[1]]$alpha==1] #core
x <- FuzzyNumber(a1 = x0[1],a4 = x0[2],a2 = x1[1],a3 = x1[2],left = function(x){x},right = function(x){1-x}) #using trapezoidal fn
x11();plot(x,bty="n")
# Alternatively:
x <- TrapezoidalFuzzyNumber(a1 = x0[1],a4 = x0[2],a2 = x1[1],a3 = x1[2])
x11();plot(x)

# If fuzzy numbers are generated by providing lower/upper alpha-cut bound generators - ie, see lower= and upper= parameters of FuzzyNumber() - then
# the so defined number can be also used to derive alpha-cuts automatically. For example:
x_alpha <- alphacut(object <- x,alpha <- c(0.25,0.5,0.75))
print(x_alpha)

# Now, let's convert the previous dataset 'datax2' from SAFD:
datax3 <- list()
for(i in 1:n){
  x0 <- datax2[[i]]$x[datax2[[i]]$alpha==0] #support
  x1 <- datax2[[i]]$x[datax2[[i]]$alpha==1] #core
  datax3[[i]] <- TrapezoidalFuzzyNumber(a1 = x0[1],a4 = x0[2],a2 = x1[1],a3 = x1[2])
}

class(datax3) #'datax3' is a list of fuzzy numbers
str(datax3[[1]]) #each element consists of parameters (a1-a4), generating lower/upper functions, generating alpha-cut functions


#### Canonical quantities with FuzzyNumbers ####
# Given a fuzzy number represented using th standards of the class FuzzyNumber, one can compute several quantities. For instance:
supp(datax3[[1]]) #support
core(datax3[[1]]) #core
evaluate(datax3[[1]],seq(1.35,1.75,length=101)) #compute the xi function (membership function) over a user defined interval

# Other relevant quantities to summarize a fuzzy number
# See: Delgado, M., Vila, M. A., & Voxman, W. (1998). On a canonical representation of fuzzy numbers. Fuzzy sets and systems, 93(1), 125-135.
expectedInterval(datax3[[1]]) #expected interval (as integral over 0-1 of left/right generating functions)
expectedValue(datax3[[1]]) #mean of the expected interval
value(datax3[[1]]) #location of the fuzzy number
width(datax3[[1]]) #width of the fuzzy number (as difference between the expected interval) #diff(expectedInterval(datax3[[1]])) 
ambiguity(datax3[[1]]) #ambiguity (as a measure of vagueness, global/total spread)

#### Simple calculus with fuzzy numbers ####
# Note: the operators {+,-,*,/} work in this context if 'FuzzyNumbers' is properly loaded.

# Consider two fns and convert them as PLFNs
# Note: PLFNs are fuzzy numbers which side generating functions and α-cut generators are piecewise linear function.
# In this way, calculus gets easier.
a <- as.PiecewiseLinearFuzzyNumber(datax3[[1]]); b <- as.PiecewiseLinearFuzzyNumber(datax3[[2]])

x11();plot(a,col=4,xlim=c(0,6),bty="n"); plot(b,col=2,add=TRUE)
plot(a + b,col=1,lwd=2,add=TRUE) #plot of sum

x11();plot(a,col=4,xlim=c(-6,4),bty="n"); plot(b,col=2,add=TRUE)
plot(a - b,col=1,lwd=2,add=TRUE) #plot of difference

x11();plot(a,col=4,xlim=c(-1,6),bty="n"); plot(b,col=2,add=TRUE)
plot(a * b,col=1,lwd=2,add=TRUE) #plot of product

x11();plot(a,col=4,xlim=c(-1,6),bty="n"); plot(b,col=2,add=TRUE)
plot(a / b,col=1,lwd=2,add=TRUE) #plot of difference

# Apply transformations of fns via Ext Principle on a given fn
c <- fapply(a,function(x)sqrt(log(x^2)))
x11();plot(a,xlim=c(0,4),bty="n"); plot(c,col=2,add=TRUE)

c <- fapply(a,function(x)x^2) #equivalent to a^2
x11();plot(a,xlim=c(0,4),bty="n"); plot(c,col=2,add=TRUE)

# Comparison of fuzzy numbers (via Dubois & Prande's approach)
# DUBOIS, Didier and PRADE, Henri, 1983, Ranking Fuzzy Numbers in the Setting of Possibility Theory. Information Sciences. 1983. Vol. 30, no. 3, p. 183–224.
# Note: we are evaluating here logical relations like {<,<=,>,>=}

x11();plot(a,col=4,xlim=c(0,6),bty="n"); plot(a+1,col=2,add=TRUE) #is a<=b true?
possibilityUndervaluation(a,a+1) #plausibility of the statement a<=b
necessityUndervaluation(a,a+1) #necessity of the statement a<=b

x11();plot(a,col=4,xlim=c(0,6),bty="n"); plot(a+1,col=2,add=TRUE) #is a>=b true?
possibilityExceedance(a,a+1) #plausibility of the statement a>=b
necessityExceedance(a,a+1) #necessity of the statement a>=b 

# Note: In case of a complete dominance of a fuzzy set over another one, all the indices would be either 1 or 0
# In this case, both the statements a<=b and a>=b has no uncertainty

# Consider the following case instead:
x11();plot(c,col=4,xlim=c(0,6),bty="n"); plot(b,col=2,add=TRUE) #is a>=b true?
# Then,
possibilityUndervaluation(c,a) # c<=a
necessityUndervaluation(c,a)
possibilityExceedance(c,a) # c>=a
necessityExceedance(c,a)
# Note: In this case, results are not conclusive.


#### Summary statistics with SAFD ####
# Compute the mean of the sample datax3 via Minkowski addition and scalar multiplication
s=datax3[[1]]
for(i in 2:n){
  s <- s + datax3[[i]] #using partial sums
}
x_mean <- 1/n * s #the multplication is meaningful iff the library FuzzyNumbers is properly loaded 
x_mean2 <- sc_mult(Msum(datax),1/n) #using the library SAFD instead on the original dataset datax

# Similarly, the same result can be get by simply running:
print(SAFD::Mmean(datax))

# Graphically represent the fuzzy number for the sample mean
x11();plot(datax3[[1]],xlim=c(0,6),bty="n",lty=2)
for(i in 2:n){plot(datax3[[i]],lty=2,add=TRUE)}
plot(x_mean,lwd=2,col=2,add=TRUE)

# Compute the variance of the sample datax via Expectation of the Bertoluzza distance between fuzzy numbers
c=0
for(i in 1:n){
  c <- c + bertoluzza(datax[[i]],x_mean2)^2
}
x_var <- c/n
print(x_var)

# Similarly:
print(SAFD::Bvar(datax))

# The mean of a fuzzy sample is a fuzzy number. In case, how can one compute the mean of the fuzzy sample represented as a scalar (single real number)?
# Two ways: 
#   (i) defuzzify each fuzzy observation (i.e., reduce a fuzzy number into a real quantity which needs to be representative of the fuzzy set) 
#       and then compute the sample mean
#  (ii) compute for each fuzzy observation the scalar quantity Val(A) and then compute the sample mean
# (iii) compute the fuzzy mean and then apply Val() of the resulting fuzzy number
# 
# Recall that Val(A) can be seen as a 'central value' that represents from a global point of view the value of the (ill-defined) magnitude that 
# the fuzzy number represents.

datax_defuzz <- SAFD::defuzzify(datax)       #(i)  defuzzify via SAFD
datax3_vals <- unlist(lapply(datax3,value))  #(ii) defuzzify by computing VAL()

xm1 <- mean(datax_defuzz) #(i)   mean defuzzified observations via SAFD
xm2 <- mean(datax3_vals)  #(ii)  mean defuzzified observations via VAL()
xm3 <- value(x_mean)      #(iii) VAL() of the fuzzy mean
print(c(xm1,xm2,xm3))

x11(); plot(datax3[[1]],xlim=c(0,6),bty="n",lty=2); for(i in 2:n){plot(datax3[[i]],lty=2,add=TRUE)}; plot(x_mean,lwd=2,col=2,add=TRUE)
abline(v = xm3,lwd=2,col=2,lty=2) #plot the scalar mean

# Let's do the same computation by using a larger random sample of fuzzy numbers.
# There are many ways to simulate random fuzzy numbers. Here, for the sake of simplicity, we consider the simplest
# function 'GeneratorNU()' from the FuzzyResampling library, which generates trpz fuzzy numbers.
# Note: 
# The function simulates the initial sample which consists of n trapezoidal fuzzy numbers. 
# The "true origin" of each fuzzy number is independently drawn from the normal distribution N (mu, sigma).
# Then increases of its core and support are independently generated from the uniform distributions U [0,a] and U [0,b]
library(FuzzyResampling)
B <- 5000; X <- GeneratorNU(n <- B,mu <- 20,sigma <- 10,a <- 3,b <- 3)
head(X) #the simulated trpz fns are in the form of Bx4 matrix (columns: a1-a4 parameters)

X_trapz <- list() #Transform the matrix of simulated trpz fuzzy numbers into a list of the class FuzzyNumer
for(i in 1:B){
  X_trapz[[i]] <- TrapezoidalFuzzyNumber(a1 <- X[i,1],a2 <- X[i,2],a3 <- X[i,3],a4 <- X[i,4])
}

s=X_trapz[[1]]; for(i in 2:B){s=s+X_trapz[[i]]} #compute the sum of trpz fns
X_trapz_mean <- 1/B * s #compute the mean of trpz fns 
print(X_trapz_mean) #..it's a fuzzy number

value(X_trapz_mean) #VAL() of the fuzzy mean
mean(unlist(lapply(X_trapz,value))) #mean of the VAL() applied on fuzzy obs


#### Frequencies and histograms ####
# Note: There is no general and accepted rule on how fuzzy frequencies need to be computed.
# Overall, one can compute frequencies of fuzzy numbers levelwise (via alpha-cuts) or considering
# the fuzzy cardinalities of overlapping fuzzy sets. Finally, one can also decide to compute frequencies/histograms on summary quantities.

# One of the proposals is that described in:
# Trutschnig, W. (2008). A strong consistency result for fuzzy relative frequencies interpreted as estimator for the fuzzy-valued probability. Fuzzy Sets and Systems, 159(3), 259-269.
# Note: 'DShistogram' follows the SAFD standards
DShistogram(XX <- datax)



# (B) Descriptive analyses on fuzzy numbers (bivariate) -------------------

# Consider the second dataset from the list Trees and make it comparable with the previous 'datax'
n <- length(Trees$species3)
datax_x1 <- list(); datax_x2 <- list()
for(i in 1:n){
  # First Trees dataset
  x0 <- Trees[[3]][[i]]$x[c(1,4)] #support
  x1 <- Trees[[3]][[i]]$x[c(2,3)] ##core
  datax_x1[[i]] <- TrapezoidalFuzzyNumber(a1 = x0[1],a4 = x0[2],a2 = x1[1],a3 = x1[2])
  
  # Second Trees dataset (up to n=37 units)
  x0 <- Trees[[2]][1:n][[i]]$x[c(1,4)] #support
  x1 <- Trees[[2]][1:n][[i]]$x[c(2,3)] ##core
  datax_x2[[i]] <- TrapezoidalFuzzyNumber(a1 = x0[1],a4 = x0[2],a2 = x1[1],a3 = x1[2])
}


#### Bivariate plots ####

# Plotting alpha-cuts
x11();par(mfrow=c(1,2));
plot(0,0,xlim=c(0,7),ylim=c(0,4),bty="n",xlab="X1",ylab="X2",col=0,main="Support of fns")
for(i in 1:n){
  a=supp(datax_x1[[i]]); b=supp(datax_x2[[i]])
  rect(xleft = a[1],xright = a[2],ybottom = b[1],ytop = b[2],lty=2,lwd=1.5,border=i)
}
plot(0,0,xlim=c(0,7),ylim=c(0,4),bty="n",xlab="X1",ylab="X2",col=0,main="Core of fns")
for(i in 1:n){
  a=core(datax_x1[[i]]); b=core(datax_x2[[i]])
  rect(xleft = a[1],xright = a[2],ybottom = b[1],ytop = b[2],lty=2,lwd=1.5,border=i)
}
# In general, generic alpha-cuts can be represented instead of supports or cores by using: alphacut(datax_x1[[i]],0.5)

# Plotting fuzzy numbers using bubble plots and canonical quantities 
values_x1 <- unlist(lapply(datax_x1,value)); values_x2 <- unlist(lapply(datax_x2,value)) #values
ambigs_x1 <- unlist(lapply(datax_x1,ambiguity)); ambigs_x2 <- unlist(lapply(datax_x2,ambiguity)) #ambiguities
x11(); plot(0,0,bty="n",xlab="X1",ylab="X2",xlim=c(-2,7),ylim=c(0.5,3),col=0)
for(i in 1:n){
  plotrix::draw.circle(x=values_x1[i],y=values_x2[i],radius <- 0.5+mean(ambigs_x1[i],ambigs_x2[i]),lty=2,border=i,lwd=1.5)
}


#### Bivariate statistics ####
# The Euclidean distance between fuzzy numbers can be used to measure fuzzy distances
Dmat <- matrix(data <- NA,nrow <- n,ncol <- n)
for(i in 1:n){
  for(j in 1:n){
    Dmat[i,j] <- distance(datax_x1[[i]],datax_x2[[j]])    
  }
}
# The resulting matrix can then be used as input to standard statistical analysis like clustering, multidimensional scaling, etc.
# For instance:
hc <- hclust(d <- as.dist(Dmat),method <- "ward.D2")
x11();plot(hc)

md <- cmdscale(d <- as.dist(Dmat),eig=TRUE,k=2)
x11();plot(md$points[,1], md$points[,2], xlab="C1", ylab="C2",col=0,bty="n")
text(md$points[,1], md$points[,2],labels <- 1:n)

# Similarly to the variance, The Fuzzy Covariance based on Bertoluzza's distance can also be computed.
# Note: The 'Bcov()' function follows the SAFD standards
# Consider here the three variables stored in Trees, ie: Betulla (n=133), Quercus (n=109), Rowan (n=37)
Cmat <- matrix(data <- NA,nrow <- 3,ncol <- 3); n=37
for(i in 1:3){
  for(j in 1:3){
    Cmat[i,j]=Bcov(Trees[[i]][1:n],Trees[[j]][1:n]) #using the original SAFD datasets
  }
}
print(Cmat)
Rmat <- cov2cor(Cmat)
x11();corrplot::corrplot(corr = Rmat,type = "lower")
# Then, Cmat or Rmat can be used for further data analysis.









