#######################################################################
## Social Network Analysis
## Master in 'Data Science per il Welfare'
## prof. Antonio Calcagnì (antonio.calcagni@unipd.it)
#######################################################################


## CONTENTS ###########################################################
# (A) Creating network graphs
# (B) Decorating network graphs
# (C) Importing network graphs
# (D) Visualizing network graphs
#######################################################################


# Initial settings --------------------------------------------------------
rm(list=ls()); graphics.off()
setwd("~/MEGA/Lavoro_sync/Didattica/2023_2024/socNetwork_analysis/") #change it according to your local path!
library(igraph)


# (A) Creating network graphs ---------------------------------------------

# The simplest way to get an undirect graph is to construct it by hand, particularly 
# by specifying vertices and edges literally:
g0 = graph_from_literal(1--2,1--5,2--3,2--4,4--5,1--3)
x11(); plot(g0)

# The sample applies for directed graph:
g0 = graph_from_literal(1-+2,1-+5,2+-3,2++4,4-+5,1-+3)
x11(); plot(g0)

# Anothter example
g0 = graph_from_literal(A-+B,A-+C,D-+E,F+-E,B-+C,C--E)
x11(); plot(g0)

# Summarizing a graph is quite easy with the 'igraph' library:
g0 = graph_from_literal(1--2,1--5,2--3,2--4,4--5,1--3)
print_all(g0)
V(g0) #vertices
length(V(g0))

E(g0) #edges
length(E(g0))

# Working with a graph is perfomed using lists and their attributes:
g0 = g0 + vertices("A","B") #add two new vertices (not connected)
x11(); plot(g0)

g0 = g0 + edges(c(1,"A",2,"A",3,"B")) #add new edges between old and new vertices
x11(); plot(g0)

g0 = delete_edges(graph = g0,edges = c(1,2,1,5)) #remove previously defined edges
x11(); plot(g0)

iid = get.edge.ids(g0, c(1,"A",3,"B")) #edges to be removed by first retrieving IDs
g0 = delete_edges(g0,edges = iid)
x11(); plot(g0)

# The library 'igraph' includes many other function to construct a graph. 
# Some are deterministic, that is to say they produce the same graph each single time.
# For instance:
g1 = make_tree(n = 10,children = 2,mode = "undirected") # a graph with 10 vertices each of which has 2 children
g2 = make_full_graph(n = 10,) # a full connected 
g3 = make_ring(n = 10) #a ring-type graph
g4 = make_star(n = 10,mode = "undirected") #a star-type graph

x11(); par(mfrow=c(2,2)); 
plot(g1,main="tree"); plot(g2,main="full")
plot(g3,main="ring"); plot(g4,main="star")

# Other functions generate graphs stochastically, which means they produce a different graph each time. 
# For instance:
g2 = sample_grg(nodes = 10,radius = 0.1) #the radius within which the vertices will be connected by an edge
x11(); plot(g2)

g3 = sample_grg(nodes = 10,radius = 0.9) 
x11(); plot(g3)

g4a = sample_grg(nodes = 10,radius = 0.25)
g4b = sample_grg(nodes = 10,radius = 0.25)
isomorphic(g4a,g4b) 
# Two graphs are said to be isomorphic if they have the same number of components (vertices and edges) and maintain a one-to-one 
# correspondence between vertices and edges, that is to say, they are connected in the same way.

# Add weights to a graph
set.seed(112)
g5 = sample_grg(nodes = 10,radius = 1)
E(g5)$weight = runif(length(E(g5)),3,5)



# (B) Decorating network graphs -------------------------------------------

# Consider a simple connected graph
set.seed(121)
g0 = sample_grg(nodes = 5,radius = 0.5)
x11(); plot(g0)

# Vertices and edges attributes (labels) can be changed or added by acting on the list attributes of the class 'graph':
V(g0)$name = c("Bob","Alice","Sally","Mary","Tom") #change vertices' names
V(g0)$sex = c("M","F","F","F","M") #add a new attribute
V(g0)$color = ifelse(V(g0)$sex=="M", "gray", "yellow") #vertices' colors as graph attribute

x11(); plot(g0,vertex.label.font=2,vertex.size=30,edge.arrow.size=2)
# Many other plot properties can be found here: https://r.igraph.org/articles/igraph.html#layouts-and-plotting

E(g0)$weight = runif(length(E(g0)),3,6)
x11(); plot(g0,edge.label=round(E(g0)$weight, 2))
x11(); plot(g0,edge.width=E(g0)$weight)
x11(); plot(g0,edge.color=ifelse(E(g0)$weight<mean(E(g0)$weight),"gray","lightblue"),edge.width=2)



# (C) Importing external graphs -------------------------------------------

# To import an external graph using the 'igraph' library, you can use various formats. 
# Here are the common types of formats and how you can import them:

# Adjacency Matrix: Represents the graph as a square matrix where the element at row ii and column jj indicates the presence of an edge between vertices ii and jj
# Edge List: Represents the graph as a list of edges, where each edge is a pair of vertices.
# Adjacency List: Represents the graph as a list of lists, where the ii-th list contains the neighbors of vertex ii.
# GraphML: A comprehensive XML-based format for graphs that can store both graph structure and attributes.
# GML (Graph Modelling Language): A hierarchical ASCII-based file format for describing graphs.
# Pajek format: A format for large networks used by the Pajek software.
# For further details, see https://r.igraph.org/reference/read.graph.html 

## Make an unweighted graph from an external adjacency matrix
A = read.csv(file = "labs/data/lab0_ex1.dat",header = TRUE); A = as.matrix(A[,-1])
rownames(A) = LETTERS[1:NROW(A)]; colnames(A) = LETTERS[1:NROW(A)]
print(A)

gA = graph_from_adjacency_matrix(adjmatrix = A,mode = "undirected",weighted = NULL, diag=FALSE)
x11(); plot(gA)

## Make a weighted graph from an external adjacency (weighted) matrix
A = read.csv(file = "labs/data/lab0_ex2.dat",header = FALSE); A = as.matrix(A)
rownames(A) = LETTERS[1:NROW(A)]; colnames(A) = LETTERS[1:NROW(A)]
print(A)

gA = graph_from_adjacency_matrix(adjmatrix = A,mode = "undirected",weighted = TRUE, diag=FALSE)
x11(); plot(gA,edge.color=ifelse(E(gA)$weight<3,"lightblue","gray"),edge.width=2)

## Import a xml-based graph (ie, graphml)
gxml = read_graph(file = "labs/data/lab0_ex3.xml",format = "graphml")
x11(); plot(gxml,vertex.size=1.5)

## Import an edgelist-based graph
gedl = read_graph(file = "labs/data/lab0_ex4.dat",format = "edgelist")
x11(); plot(gedl,vertex.size=1.5)



# (D) Visualizing network graphs ------------------------------------------

# Let's generate a random graph with 25 vertices
set.seed(1231)
g0 = sample_grg(nodes = 25,radius = 0.35)

# The default way to visualize a network graph is implemented by an automatic procedure,
# which selects a layout algorithm automatically based on the size and connectedness of the graph.
x11(); plot(g0) #automatic layout algorithm

# However, there are several algorithms that help in visualizing a network graph. Their differences can be
# appreciated especially in the case of very large networks.

# Here there are some very simple ways to plot a graph:
g0_l1 = layout_as_star(graph = g0,center = 10) #star
g0_l2 = layout_as_tree(graph = g0,root = 10) #tree
g0_l3 = layout_in_circle(graph = g0) #circle
g0_l4 = layout_on_grid(graph = g0) #grid

x11(); par(mfrow=c(2,2))
plot(g0,layout=g0_l1,main="star"); plot(g0,layout=g0_l2,main="tree")
plot(g0,layout=g0_l3,main="circle"); plot(g0,layout=g0_l4,main="grid")

# Other methods implement some theoretically-based procedures. For instance:

# Fruchterman-Reingold algorithm: 
# It is a force-directed method for graph layout that positions nodes based on physical forces. 
# Nodes repel each other, while edges act like springs pulling connected nodes together. 
# The algorithm iteratively adjusts node positions to minimize energy, creating an aesthetically pleasing and well-distributed layout.
g0_lfr = layout_with_fr(g0)

# Kamada-Kawai algorithm: 
# It is a force-directed method for graph layout. It treats the graph as a system of springs, where each pair of nodes has an ideal distance 
# based on their shortest path. The algorithm iteratively adjusts node positions to minimize the energy of the system, 
# producing a visually balanced layout.
g0_lkk = layout_with_kk(g0)

# Davidson-Harel algorithm: 
# It is a force-directed method for graph layout. It uses simulated annealing to optimize node positions, 
# reducing edge crossings and uniformly distributing nodes.
g0_ldh = layout_with_dh(g0)

# GEM (Graph Embedder) algorithm: 
# It is another force-directed method for graph layout. It is designed to provide a visually appealing arrangement of nodes 
# by minimizing edge crossings and distributing nodes evenly.
g0_lgem = layout_with_gem(g0)

# Plot all the layouts together
x11(); par(mfrow=c(2,2))
plot(g0,main="FR"); plot(g0,main="KK")
plot(g0,main="DH"); plot(g0,main="GEM")

# Some key differences among these methods:
# Fruchterman-Reingold: Simple and effective for medium-sized graphs, balances node distribution.
# Kamada-Kawai: Focuses on preserving graph-theoretic distances, ideal for smaller graphs.
# Davidson-Harel: Uses simulated annealing for better optimization, suitable for complex graphs.
# GEM: Fast and efficient, handles large graphs well with heuristic optimizations.

# How to choose among them:
# Start with a Basic Algorithm like Fruchterman-Reingold or Kamada-Kawai for general purposes.
# If the initial layout doesn't meet your requirements (e.g., too many edge crossings), experiment with algorithms like Davidson-Harel or GEM for better optimization.

# Another common way to visualize a graph is the MDS-based approach:
# Multidimensional Scaling places nodes in a low-dimensional space based on their pairwise distances, 
# aiming to preserve graph structure in a reduced-dimensional visualization.
x11(); plot(g0,layout=layout_with_mds(g0))





