#######################################################################
## Social Network Analysis
## Master in 'Data Science per il Welfare'
## prof. Antonio Calcagnì (antonio.calcagni@unipd.it)
#######################################################################


## CONTENTS ###########################################################
# (A) Import the data
# (B) Visualizing the network graph
# (C) Node/Edge-level analysis
# (D) Network-level analysis
#######################################################################


# Initial settings --------------------------------------------------------
rm(list=ls()); graphics.off()
setwd("~/MEGA/Lavoro_sync/Didattica/2023_2024/socNetwork_analysis/") #change it according to your local path!
library(igraph)


# (A) Import the data -----------------------------------------------------

# Data description: 
# In a city network graph, 25 firms are split into two sectors: technology startups (A) and manufacturing companies (B). 
# The startups collaborate closely within their group, while the manufacturing firms do the same. 
# Node 10 serves as a crucial link between these sectors, facilitating partnerships and innovation exchange.

# The dataset is represented as edgelist.
gnet = read_graph(file = "labs/data/lab1_ex1.dat",format = "edgelist")
print_all(gnet) #the graph has been built as directed graph "D"
gnet = as.undirected(gnet) #transform the graph as undirected
print_all(gnet)

# Add a few attributes we already know about the network structure
iid_A = c(4,7,12,15,18,22,20,23,25) #techonology
iid_B = setdiff(1:vcount(gnet),iid_A) #manufacturing
x = seq(1,vcount(gnet)); x[iid_A] = "A"; x[iid_B] = "B";
V(gnet)$type = x
V(gnet)$color = ifelse(V(gnet)$type=="A","lightgreen","lightblue")
V(gnet)$color[10] = "gray"



# (B) Visualizing the network graph ---------------------------------------

# Use the common FR layout algorithm
fr = layout_with_fr(gnet)
x11(); plot(gnet,layout=fr,vertex.label.font=2,vertex.size=15)

# Use another layout to highlight the role of the vertex 10
st = layout_as_star(gnet,center = 10)
x11(); plot(gnet,layout=st,vertex.label.font=2,vertex.size=15)



# (C) Node/Edge-level analysis --------------------------------------------

# Basic measures and checks
nv = vcount(gnet)
ne = ecount(gnet)

# Check whether any loop or multiple edge exist
any_loop(gnet)
any_multiple(gnet)
edge_density(graph = gnet) #then, the density measure can be computed in this case (no loops, no multiedges)
# The network graph is not as dense as a fully connected one:
edge_density(make_full_graph(nv))

# Degree
dv = degree(graph = gnet)
sum(dv<1) #no holes in the network
x11();plot(table(dv),bty="n",type="h",lwd=4)
# We notice that the degree distribution is a mixture of two subdistributions.
# It can denote the presence of heterogeneous structures or communities within the network.
# There is no clear decay in the degree distribution. 

# Neighborhood
neighborhood(graph = gnet,order = 0) #just the node with itself
neighborhood(graph = gnet,order = 1) #just the node with at least 1 other node
neighborhood(graph = gnet,order = 2) #just the node with at least 2 other nodes

x11(); plot(unlist(lapply(neighborhood(graph = gnet,order = 2),length)),type="h",bty="n",lwd=4)

# Cut-vertices 
# They are vertices whose removal increases the number of connected components in a graph.
# The removal of a single articulation point makes it disconnected.
articulation_points(gnet) 
# AS expected, the node 10 is a cut point.

# Shortest path (average)
mean_distance(gnet, directed=FALSE)

# Centrality
cc = closeness(graph = gnet,normalized = TRUE)
cb = betweenness(graph = gnet,normalized = TRUE)
ci = eigen_centrality(graph = gnet,scale = TRUE)$vector
C = data.frame(type=V(gnet)$type,cc,cb,ci)
print(C)
c(which.max(cc), which.max(cb), which.max(ci))

# A plot can be useful in this case
library(sna) #load the 'sna' library
x11(); par(mfrow=c(1,3))
A = as_adjacency_matrix(gnet, sparse=FALSE); g = network::as.network.matrix(A) #from igraph to sna 
ecols = rep("white",nv); ecols[10]="orange"; ecols[9]="red" #colorize some nodes
sna::gplot.target(dat = g, x = closeness(g,gmode="graph"),usearrows=FALSE,vertex.col=ecols,edge.col="lightgray",main="Closeness")
sna::gplot.target(dat = g, x = betweenness(g,gmode="graph"),usearrows=FALSE,vertex.col=ecols,edge.col="lightgray",main="Betweeness")
sna::gplot.target(dat = g, x = evcent(g,gmode="graph"),usearrows=FALSE,vertex.col=ecols,edge.col="lightgray",main="EigenC")
detach("package:sna", unload=TRUE) #unload the 'sna' library because of conflicts with 'igraph'

# We can also add centrality measures as network attributes (e.g., vertex size)
V(gnet)$size =  closeness(gnet, normalized=TRUE)*25
x11(); plot(gnet,layout=fr,vertex.label.font=2)

# Cliques and subgraphs
out = sapply(max_cliques(gnet),length) #count for each vertex its maxim cliques
table(out) #how many maximal cliques

mcl3 = max_cliques(gnet)[out==3] #retrieve cliques with maximal size 3
print(mcl3)

# Triangles can reveal small collaboration groups.
V(gnet)$color[unlist(mcl3)] = "orange" 
V(gnet)$size =  NA #reset the size of each vertex
x11(); plot(gnet,layout=fr,vertex.label.font=2,vertex.size=15)
# Most of the triangles are located in the technological area.
# Note: They are maximal, in the sense that they form proper connected subgraphs.

# We can also analyse the subgraph of maximal 3-cliques:
gnet3 = subgraph(graph = gnet,vids = unlist(mcl3))
V(gnet3)$name=unique(unlist(mcl3))
fr3 = layout_with_fr(gnet3)
x11(); plot(gnet3,layout=fr3,vertex.label.font=1,vertex.size=15)
# and computes the usual stats for graphs..



# (D) Network-level analysis ----------------------------------------------

# Global and local transitivity
transitivity(graph = gnet,type = "global")
transitivity(graph = gnet,type = "local",vids = c(9,10)) #using the previously identified central nodes

# Clustering
gnet_cl = cluster_louvain(graph = gnet)
x11(); plot(gnet_cl,gnet,layout=fr,vertex.label.font=2,vertex.size=15)
modularity(gnet_cl)
# The clustering method identifies the two industrial areas A and B, and also identifies two clusters
# in the area B (manufacturing). However, this last result does not seem really relevant.
# Let's try another way of clustering:
gnet_cl2 = cluster_fast_greedy(graph = gnet)
x11(); plot(gnet_cl2,gnet,layout=fr,vertex.label.font=2,vertex.size=15)
modularity(gnet_cl2)
# The solutions does not change. 
# In the fast greedy case, we can also visualize the dendrogram of the clustering result:
x11(); plot(as.dendrogram(gnet_cl2))






