Commit c2f576f0 authored by MGerlach's avatar MGerlach
Browse files

added some info to description

parent 1c2abdbe
%% Cell type:markdown id: tags:
# Calculating different network metrics
Networkx:
- [x] N, E: Number of nodes/edges in the network N
- [x] r: Density
- [x] k: avg degree
- [x] C: Clustering coefficient
- [x] geff: gloabal efficiency
- [x] cpl: Characteristic path length (largest connected component)
- [x] geff: global efficiency
- [x] L: Characteristic path length (largest connected component)
- [ ] Phi: small-world propensity
- networkx contains code for a quantity called [sigma](https://networkx.org/documentation/networkx-2.2/reference/algorithms/generated/networkx.algorithms.smallworld.sigma.html)
- this is different than the small-world propensity described, e.g., in
BCT
- [x] cps: core-periphery structure
Graph-tool
- [x] mdl: Minimum Description Length
- [x] modq: Modularity score of the partition on the lowest level in the hierarchy
Risper:
- [x] homology: what is the metric?
- [x] persistent homology :Number of 0,1,2-cycles
- [?] compression: use description length?
- [x] mechanical features
- [x] mechanical features: d, DoF_C
General:
- [ ] All observables for randomized versions
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Load packages
%% Cell type:code id: tags:
``` python
import os, sys
import json
import pickle
from sqlitedict import SqliteDict
import json
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import graph_tool.all as gt
import bct
from ripser import ripser
import utils_network
import utils_networkx
import utils_gt
# import utils_ripser
import utils_filtration_metrics
%load_ext autoreload
%autoreload 2
```
%% Cell type:markdown id: tags:
# Load datasets
%% Cell type:code id: tags:
``` python
snapshot = "2022-03"
wiki_db = "enwiki"
# mode = "pickle" # for bulk acccess
mode = "sqlite" # for individual access
```
%% Cell type:code id: tags:
``` python
## Load a links table of the form {page_id: pageids of outlinks }
FNAME_read = "/home/mgerlach/REPOS/curios-critical-readers/data/pages-links_%s_%s.{0}"%(wiki_db,snapshot)
if mode == "pickle":
with open(FNAME_read.format("pkl"),"rb") as fin:
dict_links = pickle.load(fin)
elif mode == "sqlite":
dict_links = SqliteDict(FNAME_read.format("sqlite"))
else:
dict_links = {}
print(len(dict_links))
```
%% Cell type:code id: tags:
``` python
# reading sessions subsample
FNAME_read = "/home/mgerlach/REPOS/curios-critical-readers/data/sessions-app_%s_%s_small.json"%(wiki_db,snapshot)
list_sessions = []
with open(FNAME_read) as fin:
for line in fin:
json_in = json.loads(line)
session = json_in.get("session",[])
list_sessions+=[session]
```
%% Cell type:code id: tags:
``` python
# select a single reading session
session = list_sessions[3]
session = list_sessions[22]
print("Length of session: ",len(session))
for page in session:
print(page)
if page["pos"] == 5:
break
```
%% Cell type:markdown id: tags:
# Preparing the networks
%% Cell type:code id: tags:
``` python
# we want an undirected network
directed = False
```
%% Cell type:code id: tags:
``` python
# list of nodes/edges from hyperlinks between articles
list_nodes, list_edges = utils_network.session2edgelist_links(session,dict_links, directed = directed)
```
%% Cell type:code id: tags:
``` python
# graph-object networkx
g_nx = utils_networkx.make_graph_links(list_nodes, list_edges, directed=directed)
print(g_nx)
```
%% Cell type:code id: tags:
``` python
# graph-object graph-tool
g_gt = utils_gt.make_graph_links(list_nodes, list_edges, directed = directed)
print(g_gt)
```
%% Cell type:code id: tags:
``` python
# Adjacency matrix
A = nx.convert_matrix.to_numpy_array(g_nx)
print(A)
```
%% Cell type:code id: tags:
``` python
# rewired edgelist (degree-preserving) in case we want to compare with a null model
list_edges_rewired = utils_network.rewire_edges(list_edges, directed = directed)
```
%% Cell type:markdown id: tags:
# Networkx
%% Cell type:code id: tags:
``` python
# nodes and edges
N = nx.number_of_nodes(g_nx)
E = nx.number_of_edges(g_nx)
print("Nodes: ", N)
print("Edges: ", E)
```
%% Cell type:code id: tags:
``` python
# average degree
k = np.mean([v for k,v in nx.degree(g_nx)])
print("Average degree: ", k)
```
%% Cell type:code id: tags:
``` python
# density
r = nx.density(g_nx)
print("Density: ", r)
```
%% Cell type:code id: tags:
``` python
# clustering
C = nx.average_clustering(g_nx)
print("Clustering coefficient: ", C)
```
%% Cell type:code id: tags:
``` python
# global efficiency
geff = nx.global_efficiency(g_nx)
print("Global efficiency: ", geff)
```
%% Cell type:code id: tags:
``` python
# Characteristic path length
g_nx_conmax = g_nx.subgraph(max(nx.connected_components(g_nx), key=len))
cpl = nx.average_shortest_path_length(g_nx_conmax)
print("Characteristic Path Length (largest connected component): ", cpl)
```
%% Cell type:code id: tags:
``` python
# small-world property
# sigma = nx.sigma(g_nx_conmax)
# print("Small-world property: ", sigma)
```
%% Cell type:code id: tags:
``` python
# small-world propensity
# TODO
```
%% Cell type:markdown id: tags:
# BCT
%% Cell type:code id: tags:
``` python
# core-periphery structure
cps_result = bct.core_periphery_dir(A)
cps = cps_result[1]
print("Core-periphery structure: ", cps)
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# Graph-tool
%% Cell type:code id: tags:
``` python
# fit a hierarchical blockmodel
state = gt.minimize_nested_blockmodel_dl(g_gt)
# do a few swaps to find minimum
for i in range(100):
ret = state.multiflip_mcmc_sweep(niter=10, beta=np.inf)
state.draw()
```
%% Cell type:code id: tags:
``` python
# description length
mdl = state.entropy()
print("Minimum Description Length :", mdl)
```
%% Cell type:code id: tags:
``` python
# modularity of partition on the lowest level
l = 0
blocks = state.project_level(l).get_blocks()
modq = gt.modularity(g_gt,blocks)
print("Modularity score (partition on the lowest level): ", modq)
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# Filtration metrics
%% Cell type:markdown id: tags:
### Persistent homology
%% Cell type:code id: tags:
``` python
G = nx.adjacency_matrix(g_nx).todense()
G = np.array(G)
G = G + np.transpose(G) # make symmetric
G[G > 0] = 1 # binarize
# make a filtration matrix from adjacency matrix
weighted_G = utils_filtration_metrics.make_filtration_matrix(G)
```
%% Cell type:code id: tags:
``` python
# Compute persistent homology
bars_orig = utils_filtration_metrics.get_barcode(weighted_G)
bettis_orig = utils_filtration_metrics.betti_curves(bars_orig, weighted_G.shape[0])
```
%% Cell type:code id: tags:
``` python
utils_filtration_metrics.plot_barcode(bars_orig, weighted_G.shape[0])
```
%% Cell type:code id: tags:
``` python
plt.plot(np.arange(G.shape[0]), bettis_orig[0])
plt.xlabel('Nodes')
plt.ylabel('Number of 0-Cycles')
```
%% Cell type:code id: tags:
``` python
plt.plot(np.arange(G.shape[0]), bettis_orig[1])
plt.xlabel('Nodes')
plt.ylabel('Number of 1-Cycles')
```
%% Cell type:code id: tags:
``` python
plt.plot(np.arange(G.shape[0]), bettis_orig[2])
plt.xlabel('Nodes')
plt.ylabel('Number of 2-Cycles')
```
%% Cell type:markdown id: tags:
### Mechanical features
%% Cell type:code id: tags:
``` python
[d, conform] = utils_filtration_metrics.compute_mechanical_features(G)
```
%% Cell type:code id: tags:
``` python
plt.plot(np.arange(G.shape[0]), d)
plt.xlabel('Nodes')
plt.ylabel('d')
```
%% Cell type:code id: tags:
``` python
plt.plot(np.arange(G.shape[0]), conform)
plt.xlabel('Nodes')
plt.ylabel('DoF_C')
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment