library(igraph)
library(networkdata)
library(netUtils)
Social Network Analysis
Worksheet 3: Network Concepts and Descriptives I
Load Packages
To start off…
Load the following two datasets
data("tailor_social")
data("tailor_work")
Which of the two networks is directed and how can you verify this?
Solution
Show solution
# print both and check for `--` and `->` edges or "UN" and "DN"
tailor_social
tailor_work
#str from netUtils prints the type explicitly
str(tailor_social)
str(tailor_work)
is_directed(tailor_social)
is_directed(tailor_work)
Exercise: Reading network data
- load all network files in
data.zip
into R and create appropriate igraph objects - calculate the density of the networks
- calculate local and global transitivity (if possible)
- check if the global transitivity differs from a random graph (see slide 60; choose one network or all)
Solution
Show solution
#net1
<- read.csv("data/net1.csv",header = TRUE,row.names = 1,sep = " ")
tab class(tab)
<- as.matrix(tab)
A graph_from_adjacency_matrix(A)
#check if the matrix is symmetric
== t(A)
A
<- graph_from_adjacency_matrix(A,mode = "undirected",weighted = TRUE,diag = FALSE)
net1
#net2
<- read.csv("data/net2.csv",header = TRUE, sep = ",")
tab #graph_from_edgelist(tab)
<- as.matrix(tab)
el <- graph_from_edgelist(el,directed = FALSE)
net2
#net3
<- read.csv("data/net3.csv",header = TRUE, sep="\t")
tab <- as.matrix(tab)
el <- graph_from_edgelist(el[,1:2],directed = FALSE)
net3
E(net3)$weight <- tab[,3]
<- set_edge_attr(net3,name = "weight",value = tab[,3])
net3
#net4
<- read.csv("data/net4.csv",header = TRUE,row.names = 1,sep = ",")
tab class(tab)
<- as.matrix(tab)
A #check if the matrix is symmetric
== t(A)
A <- graph_from_adjacency_matrix(A,mode = "directed",weighted = TRUE,diag = FALSE)
net4
#net5
<- read.csv("data/net5.csv",header = TRUE,row.names = 1,sep = "\t")
tab class(tab)
<- as.matrix(tab)
A
#check if the matrix is symmetric
== t(A)
A <- graph_from_adjacency_matrix(A,mode = "directed",weighted = TRUE,diag = FALSE) net5
Netreader Addin
(Interactive Session)
Exercise: Create networks
Try to create igraph objects for the following networks:
Choose one network (you can also do more) and do the following:
- add
names
as a node attribute - add an edge attribute
weight
and assign it values of your choice - find a way to extract the edgelist and the adjacency matrix of the networks
Solution
Show solution
library(patchwork)
library(ggraph)
<- make_full_graph(5)
g1 V(g1)$name <- LETTERS[1:5]
#g1 <- graph_from_literal(A:B:C:D:E -- A:B:C:D:E)
<- ggraph(g1,"stress")+
p1 geom_edge_link0()+
geom_node_point(shape=21,fill="grey66",size=10)+
geom_node_text(aes(label=name), size = 7)
<- make_graph(edges = c("A","B","B","C"))
g2 #g2 <- graph_from_literal( A -+ B -+ C )
#g2 <- make_ring(3,directed = TRUE,circular = FALSE)
<- ggraph(g2,"stress")+
p2 geom_edge_link(arrow = arrow(angle = 15, length = unit(0.15, "inches"), ends = "last", type = "closed"),end_cap=circle(12,"pt"),n=2)+
geom_node_point(shape=21,fill="grey66",size=10)+
geom_node_text(aes(label=name), size= 7)
<- make_star(5,"undirected",center = 3)
g3 V(g3)$name <- 1:5
<- ggraph(g3,"stress")+
p3 geom_edge_link0()+
geom_node_point(shape=21,fill="grey66",size=10)+
geom_node_text(aes(label=name), size = 7)
+ p2 + p3 p1
Structural features
Exercise: Degree Distribution
<- sample_pa(1500, power = 1.5, m = 3, directed = FALSE)
pa <- sample_gnp(1500, p = 0.003) er
- Verify that the networks have approximately the same density
- Plot the degree distributions of both networks and check for skewness
- calculate the degrees (
degree()
) and order the sequence decreasingly (order()
) - For both networks, create a loop (i=1,…, 50) which deletes the top i vertices (
delete_vertices()
) in the ordered degree sequence and compute the diameter of the resulting networks - What can you observe?
Solution
Show solution
#calculate the density (edge_density is the same as graph.density)
edge_density(pa)
edge_density(er)
# for the preferential attachment network,
# we plot the degree distribution on a log-log scale
# a straight-ish line should appear
plot(degree_distribution(pa), log="xy")
plot(degree_distribution(er))
#calculate the degrees explicitly
<- order(degree(er),decreasing = TRUE)
deg_er <- order(degree(pa),decreasing = TRUE)
deg_pa
# iterate through the top 50 nodes, delete them and calculate the diameter
<- rep(0,50)
res_er <- rep(0,50)
res_pa
for(i in 1:50){
<- delete_vertices(er,deg_er[1:i])
g1 <- delete_vertices(pa,deg_pa[1:i])
g2 <- diameter(g1)
res_er[i] <- diameter(g2)
res_pa[i]
}
plot(res_pa,type = "l", col="red",ylab="diameter")
lines(res_er,col="black")
You’ll notice that the diameter increases very quickly for a preferential attachment network. The diameter of the random network on the other hand remains constant. If you would redo the analysis for the preferential attachment network and remove random nodes, you’ll notice that the diameter also remains constant. A preferential network is thus said to be fragile to targeted attacks but robust to random attacks.
Exercise: Density, Transitivity, Distances
Real world networks tend to have a high transitivity (transitivity(type="global")
), low average distance (mean_distance()
) and a low density (graph.density()
).
- Experiment with
sample_pa()
andsample_gnp()
and compute the three stats. Can all three criteria be fullfiled? Why/why not?
Hint
# Parameters
<- 1000 # number of nodes
n <- 5 # edges per new node in PA
m <- 0.01 # connection probability for GNP
p
# Preferential Attachment
<- sample_pa(n, power = 1, m = m, directed = FALSE)
g_pa
# Erdős–Rényi (GNP)
<- sample_gnp(n, p, directed = FALSE)
g_gnp
# Compute statistics
<- function(graph) {
compute_stats <- transitivity(graph, type = "global")
trans <- mean_distance(graph, directed = FALSE, unconnected = TRUE)
dist <- edge_density(graph)
dens return(c(transitivity = trans, mean_distance = dist, density = dens))
}
# Get stats
<- compute_stats(g_pa)
stats_pa <- compute_stats(g_gnp)
stats_gnp
# Combine into a data frame
<- data.frame(
results Model = c("Preferential Attachment", "Erdős–Rényi (GNP)"),
Transitivity = c(stats_pa["transitivity"], stats_gnp["transitivity"]),
Mean_Distance = c(stats_pa["mean_distance"], stats_gnp["mean_distance"]),
Density = c(stats_pa["density"], stats_gnp["density"])
)
results