Social Network Analysis

Worksheet 7: Network Visualization II

Author

Termeh Shafie

Load Packages

library(igraph)
library(graphlayouts)
library(ggraph)
library(networkdata)

Dynamic layouts

library(gganimate)
library(ggplot2)
library(patchwork)
# also install package 'gifski' to create gifs

# downloaded from https://www.stats.ox.ac.uk/~snijders/siena/siena_datasets.htm
data("s50")
s50

[[1]]
IGRAPH dd2aeb6 UN-- 50 74 -- 
+ attr: name (v/c), smoke (v/n)
+ edges from dd2aeb6 (vertex names):
 [1] V1 --V11 V1 --V14 V2 --V7  V2 --V11 V3 --V4  V3 --V9  V4 --V9  V5 --V32
 [9] V6 --V8  V7 --V12 V7 --V26 V7 --V42 V7 --V44 V10--V11 V10--V14 V10--V15
[17] V10--V33 V11--V14 V11--V15 V11--V16 V11--V19 V11--V30 V12--V42 V12--V44
[25] V15--V16 V17--V18 V17--V19 V17--V21 V17--V22 V17--V24 V18--V19 V18--V35
[33] V19--V24 V19--V26 V19--V30 V21--V22 V21--V24 V21--V31 V21--V32 V22--V24
[41] V22--V25 V22--V31 V22--V34 V22--V43 V23--V24 V25--V31 V25--V32 V26--V29
[49] V26--V30 V26--V44 V27--V28 V27--V29 V27--V30 V29--V30 V29--V33 V30--V33
[57] V31--V32 V31--V34 V31--V37 V32--V37 V34--V37 V36--V38 V36--V41 V38--V41
+ ... omitted several edges

[[2]]
IGRAPH 6e28c77 UN-- 50 81 -- 
+ attr: name (v/c), smoke (v/n)
+ edges from 6e28c77 (vertex names):
 [1] V1 --V10 V1 --V11 V1 --V14 V1 --V33 V2 --V26 V3 --V4  V3 --V9  V4 --V5 
 [9] V4 --V17 V4 --V34 V5 --V17 V6 --V8  V6 --V35 V7 --V26 V7 --V44 V10--V11
[17] V10--V14 V10--V33 V11--V14 V11--V19 V11--V26 V11--V30 V12--V15 V12--V26
[25] V12--V42 V12--V44 V15--V16 V15--V36 V15--V42 V16--V26 V16--V42 V16--V44
[33] V17--V22 V17--V24 V17--V27 V17--V32 V18--V35 V19--V21 V19--V23 V19--V30
[41] V19--V36 V19--V41 V21--V31 V21--V37 V21--V40 V22--V24 V23--V50 V24--V25
[49] V24--V28 V25--V27 V25--V28 V25--V32 V26--V42 V27--V28 V28--V35 V29--V30
[57] V29--V33 V29--V42 V30--V33 V30--V36 V30--V41 V31--V32 V31--V37 V32--V37
+ ... omitted several edges

[[3]]
IGRAPH 9285b7f UN-- 50 77 -- 
+ attr: name (v/c), smoke (v/n)
+ edges from 9285b7f (vertex names):
 [1] V1 --V10 V1 --V11 V1 --V14 V1 --V41 V2 --V7  V2 --V23 V2 --V26 V3 --V4 
 [9] V3 --V9  V3 --V34 V4 --V32 V4 --V34 V5 --V17 V5 --V32 V6 --V24 V6 --V27
[17] V6 --V28 V7 --V16 V7 --V26 V7 --V42 V7 --V44 V8 --V25 V10--V11 V10--V12
[25] V10--V14 V10--V33 V11--V14 V11--V15 V11--V33 V12--V15 V12--V33 V14--V33
[33] V15--V29 V15--V33 V15--V36 V16--V26 V16--V42 V16--V44 V17--V22 V17--V27
[41] V19--V29 V19--V30 V19--V36 V21--V31 V21--V37 V21--V40 V21--V45 V24--V27
[49] V24--V28 V25--V50 V26--V44 V27--V28 V29--V30 V29--V33 V30--V33 V30--V36
[57] V31--V37 V35--V37 V35--V50 V36--V38 V36--V41 V37--V47 V38--V41 V39--V43
+ ... omitted several edges

The dataset consists of three networks with 50 actors each and a vertex attribute for the smoking behaviour of students. As a first step, we need to create a layout for all three networks. You can basically use any type of layout for each network, but I’d recommend layout_as_dynamic() from the package {{graphlayouts}}. The algorithm calculates a reference layout which is a layout of the union of all networks and individual layouts based on stress minimization and combines those in a linear combination which is controlled by the alpha parameter. For alpha=1, only the reference layout is used and all graphs have the same layout. For alpha=0, the stress layout of each individual graph is used. Values in-between interpolate between the two layouts.

# Try other values for alpha
xy <- layout_as_dynamic(s50, alpha = 0.2)
pList <- vector("list", length(s50))

#static plots
for (i in 1:length(s50)) {
  pList[[i]] <- ggraph(s50[[i]], layout = "manual", x = xy[[i]][, 1], y = xy[[i]][, 2]) +
    geom_edge_link0(edge_width = 0.6, edge_colour = "grey66") +
    geom_node_point(shape = 21, aes(fill = as.factor(smoke)), size = 6) +
    geom_node_text(label = 1:50, repel = FALSE, color = "white", size = 4) +
    scale_fill_manual(
      values = c("forestgreen", "grey25", "firebrick"),
      guide = ifelse(i != 2, "none", "legend"),
      name = "smoking",
      labels = c("never", "occasionally", "regularly")
    ) +
    theme_graph() +
    theme(legend.position = "bottom") +
    labs(title = paste0("Wave ", i))
}
# Reduce("+", pList)
pList[[1]] + pList[[2]] + pList[[3]]

This is nice but of course we want to animate the changes. This is where we say goodbye to ggraph and hello to good-old ggplot2. First, we create a list of data frames for all nodes and add the layout to it.

# create a list which contains all nodes and layout
nodes_lst <- lapply(1:length(s50), function(i) {
  cbind(igraph::as_data_frame(s50[[i]], "vertices"),
    x = xy[[i]][, 1], y = xy[[i]][, 2], frame = i
  )
})

edges_lst <- lapply(1:length(s50), function(i) {
  cbind(igraph::as_data_frame(s50[[i]], "edges"), frame = i)
})

edges_lst <- lapply(1:length(s50), function(i) {
  edges_lst[[i]]$x <- nodes_lst[[i]]$x[match(edges_lst[[i]]$from, nodes_lst[[i]]$name)]
  edges_lst[[i]]$y <- nodes_lst[[i]]$y[match(edges_lst[[i]]$from, nodes_lst[[i]]$name)]
  edges_lst[[i]]$xend <- nodes_lst[[i]]$x[match(edges_lst[[i]]$to, nodes_lst[[i]]$name)]
  edges_lst[[i]]$yend <- nodes_lst[[i]]$y[match(edges_lst[[i]]$to, nodes_lst[[i]]$name)]
  edges_lst[[i]]$id <- paste0(edges_lst[[i]]$from, "-", edges_lst[[i]]$to)
  edges_lst[[i]]$status <- TRUE
  edges_lst[[i]]
})

head(edges_lst[[1]])

  from  to frame         x         y      xend       yend     id status
1   V1 V11     1  1.803046  0.247353  2.123787 -0.7457415 V1-V11   TRUE
2   V1 V14     1  1.803046  0.247353  2.391336  0.2295657 V1-V14   TRUE
3   V2  V7     1  3.623264 -1.313577  3.870197 -1.9488863  V2-V7   TRUE
4   V2 V11     1  3.623264 -1.313577  2.123787 -0.7457415 V2-V11   TRUE
5   V3  V4     1 -4.877254 -2.719843 -3.854924 -2.8766208  V3-V4   TRUE
6   V3  V9     1 -4.877254 -2.719843 -5.416373 -3.4092101  V3-V9   TRUE

We have expanded the edge data frame in a way that also includes the coordinates of the endpoints from the layout that we calculated earlier.

Now we create a helper matrix which includes all edges that are present in any of the networks.

all_edges <- do.call("rbind", lapply(s50, get.edgelist))
all_edges <- all_edges[!duplicated(all_edges), ]
all_edges <- cbind(all_edges, paste0(all_edges[, 1], "-", all_edges[, 2]))

This is used to impute the edges into all networks. So any edge that is not present in time frame two and three gets added to time frame one. But to keep track of these, we set there status to FALSE.

edges_lst <- lapply(1:length(s50), function(i) {
  idx <- which(!all_edges[, 3] %in% edges_lst[[i]]$id)
  if (length(idx != 0)) {
    tmp <- data.frame(from = all_edges[idx, 1], to = all_edges[idx, 2], id = all_edges[idx, 3])
    tmp$x <- nodes_lst[[i]]$x[match(tmp$from, nodes_lst[[i]]$name)]
    tmp$y <- nodes_lst[[i]]$y[match(tmp$from, nodes_lst[[i]]$name)]
    tmp$xend <- nodes_lst[[i]]$x[match(tmp$to, nodes_lst[[i]]$name)]
    tmp$yend <- nodes_lst[[i]]$y[match(tmp$to, nodes_lst[[i]]$name)]
    tmp$frame <- i
    tmp$status <- FALSE
    edges_lst[[i]] <- rbind(edges_lst[[i]], tmp)
  }
  edges_lst[[i]]
})

Why are we doing this? After a lot of experimenting, I came to the conclusion that it is always best to draw all edges, but use zero opacity if status = FALSE. In that way, one gets a smoother transition for edges that (dis)appear. There are probably other workarounds though.

In the last step, we create a data frame out of the lists.

edges_df <- do.call("rbind", edges_lst)
nodes_df <- do.call("rbind", nodes_lst)

head(edges_df)

  from  to frame         x         y      xend       yend     id status
1   V1 V11     1  1.803046  0.247353  2.123787 -0.7457415 V1-V11   TRUE
2   V1 V14     1  1.803046  0.247353  2.391336  0.2295657 V1-V14   TRUE
3   V2  V7     1  3.623264 -1.313577  3.870197 -1.9488863  V2-V7   TRUE
4   V2 V11     1  3.623264 -1.313577  2.123787 -0.7457415 V2-V11   TRUE
5   V3  V4     1 -4.877254 -2.719843 -3.854924 -2.8766208  V3-V4   TRUE
6   V3  V9     1 -4.877254 -2.719843 -5.416373 -3.4092101  V3-V9   TRUE

head(nodes_df)

   name smoke         x         y frame
V1   V1     2  1.803046  0.247353     1
V2   V2     3  3.623264 -1.313577     1
V3   V3     1 -4.877254 -2.719843     1
V4   V4     1 -3.854924 -2.876621     1
V5   V5     1 -2.824439 -3.368968     1
V6   V6     1 -1.529341 -5.292054     1

And that’s it in terms of data wrangling. All that is left is to plot/animate the data.

ggplot() +
  geom_segment(
    data = edges_df,
    aes(x = x, xend = xend, y = y, yend = yend, group = id, alpha = status),
    show.legend = FALSE
  ) +
  geom_point(
    data = nodes_df, aes(x, y, group = name, fill = as.factor(smoke)),
    shape = 21, size = 4, show.legend = FALSE
  ) +
  scale_fill_manual(values = c("forestgreen", "grey25", "firebrick")) +
  scale_alpha_manual(values = c(0, 1)) +
  ease_aes("quadratic-in-out") +
  transition_states(frame, state_length = 0.5, wrap = FALSE) +
  labs(title = "Wave {closest_state}") +
  theme_void()

Interactive plots with visNetwork

library(visNetwork)
data("karate")

visIgraph(karate)

karate_df <- toVisNetworkData(karate)
visNetwork(nodes = karate_df$nodes, 
           edges = karate_df$edges, height = "300px")

Gimmicks

The ggforce package works pretty nicely with ggraph. You can, for instance, use the geom_mark_*() functions to highlight clusters.

library(ggforce)

set.seed(665)

#create network with a group structure
g <- sample_islands(9, 40, 0.4, 15)
g <- simplify(g)
V(g)$grp <- as.character(rep(1:9, each = 40))

ggraph(g, layout = "backbone", keep = 0.4) +
  geom_edge_link0(edge_color = "grey66", edge_width = 0.2) +
  geom_node_point(aes(fill = grp), shape = 21, size = 3) +
  geom_mark_hull(
    aes(x, y, group = grp, fill = grp),
    concavity = 4,
    expand = unit(2, "mm"),
    alpha = 0.25
  ) +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  theme_graph()+
  theme(legend.position = "none")

Of course you can also add a label to your clusters.

ggraph(g, layout = "backbone", keep = 0.4) +
  geom_edge_link0(edge_color = "grey66", edge_width = 0.2) +
  geom_node_point(aes(fill = grp), shape = 21, size = 3) +
  geom_mark_hull(
    aes(x, y, group = grp, fill = grp, label=grp),
    concavity = 4,
    expand = unit(2, "mm"),
    alpha = 0.25
  ) +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  theme_graph()+
  theme(legend.position = "none")

“How can I achieve that my directed edges stop at the node border, independent from the node size?”

Out of the box you will probably end up with something like this

# create a random network
set.seed(1071)
g <- sample_pa(30, 1)
V(g)$degree <- degree(g, mode = "in")

ggraph(g, "stress") +
  geom_edge_link(
    aes(end_cap = circle(node2.degree + 2, "pt")),
    edge_colour = "black",
    arrow = arrow(
      angle = 10,
      length = unit(0.15, "inches"),
      ends = "last",
      type = "closed"
    )
  ) +
  geom_node_point(aes(size = degree), col = "grey66", show.legend = FALSE) +
  scale_size(range = c(3, 11)) +
  theme_graph()

The overlap can be avoided by using the I() function from base R, which treats the entries of a vector “as is”. So we know that if a node has degree 5, it will be mapped to a circle with radius (or diameter?) “5pt”. Since this means, that you have no control over the scaling, you need to do that beforehand.

normalise <- function(x, from = range(x), to = c(0, 1)) {
  x <- (x - from[1]) / (from[2] - from[1])
  if (!identical(to, c(0, 1))) {
    x <- x * (to[2] - to[1]) + to[1]
  }
  x
}

# map to the range you want
V(g)$degree <- normalise(V(g)$degree, to = c(3, 11))

ggraph(g, "stress") +
  geom_edge_link(
    aes(end_cap = circle(node2.degree + 2, "pt")),
    edge_colour = "grey25",
    arrow = arrow(
      angle = 10,
      length = unit(0.15, "inches"),
      ends = "last",
      type = "closed"
    )
  ) +
  geom_node_point(aes(size = I(degree)), col = "grey66") +
  theme_graph()

“How can I lower the opacity of nodes without making edges visible underneath?”

One of the rules you should try to follow is that edges should not be visible on top of nodes. Usually that is easy to achieve by drawing the edges before the nodes. But if you want to lower the opacity of nodes, they do become visible again.

g <- sample_gnp(20, 0.5)
V(g)$degree <- degree(g)

ggraph(g, "stress") +
  geom_edge_link(edge_colour = "grey66") +
  geom_node_point(
    size = 8,
    aes(alpha = degree),
    col = "red",
    show.legend = FALSE
  ) +
  theme_graph()

The solution is rather simple. Just add a node layer with the same aesthetics below with alpha=1 (default) and color="white" (or the background color of the plot).

ggraph(g, "stress") +
  geom_edge_link(edge_colour = "grey66") +
  geom_node_point(size = 8, col = "white") +
  geom_node_point(
    aes(alpha = degree),
    size = 8,
    col = "red",
    show.legend = FALSE
  ) +
  theme_graph()

Of course you could also use start_cap and end_cap here, but you may have to fiddle again as in the last example.

“How can I enhance readability of node labels in hairball graphs?”

Sometimes it is really hard to make labels readable when the network is very cluttered

g <- sample_gnp(50, 0.7)
V(g)$name <- sapply(1:50, function(x) paste0(sample(LETTERS, 4), collapse = ""))
E(g)$weight <- runif(ecount(g))

ggraph(g) +
  geom_edge_link0(aes(edge_color = weight, edge_width = weight), show.legend = FALSE) +
  geom_node_point(size = 8, color = "#44a6c6") +
  geom_node_text(aes(label = name), fontface = "bold") +
  scale_edge_color_continuous(low = "grey66", high = "black") +
  scale_edge_width(range = c(0.1, 0.5)) +
  theme_graph() +
  coord_fixed()

Using "stress" as default layout

Here you can make use of the fact that the layout of the nodes are stored in a “hidden” data frame when a ggraph object is constructed (this is what we made use of with geom_mark_hull() too). That means you can use other geoms from other packages. In this case, the shadowtext package as shown below.

ggraph(g,"stress") +
  geom_edge_link0(aes(edge_color = weight, edge_width = weight), show.legend = FALSE) +
  geom_node_point(size = 8, color = "#44a6c6") +
  shadowtext::geom_shadowtext(aes(x, y, label = name), color = "black", size = 4, bg.colour = "white") +
  scale_edge_color_continuous(low = "grey66", high = "black") +
  scale_edge_width(range = c(0.1, 0.5)) +
  theme_graph() +
  coord_fixed()