TP-ComDec
Posted on Thu 23 January 2020 in posts
TP Network and Community Detection¶
the goal here is to implement different methods for partitionning a graph into many clusters. We will first focus on the graph Laplacian, and then on implementing Belief Propagation.
Spectral Clustering¶
The first part is to construct a fonction generating random graph with blocks (stochastic block model) in the simplest manner. Then we will look at some properties using the graph spectrum.
- Make a function to build a graph with a $p_{ab}$ matrix of probability and giving the probability to be in a community $n_a$ (in order to simplify the procedure, it is possible to generate the graph with $N_a = n_a N$ nodes in each community such that they will be already in blocks in the adjacency matrix). You can visualize your graph by using imshow on the adjacency matrix, or you can write a csv file 'node1 node2' is there is an edge between node1 and node2, and list one edge by line. Then it is possible to use gephy to have a representation of your graph.
- How to find isolated nodes ?, check that you do not have a small disconnected part.
- Check that you obtain the correct degree with the formula
- Take an easy case fixing $p_{in}$ and $p_{out}$ has the probability to have an edge within or betwewen clusters and fixing the ratio $\epsilon = p_{out}/p_{in}$ to a small (easy) value. Check that for different number of communities (q=2,3,4) you can recover the clusters. It can be intersting to plot the scatter plot over the useful eigenvectors.
- varying the ratio $\epsilon = p_{out}/p_{in}$, try to see whenever it is possible to recover the communities or not (take $q=2$ for simplicity).
In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
In [27]:
def BuildAdj(qi,p_ab,N):
A = np.zeros((N,N),int)
Num_pairs = int(N*(N-1)/2)
idx = 0
rdm_numb = np.random.random(Num_pairs)
for i in range(N):
for j in np.arange(i+1,N):
if(rdm_numb[idx]<p_ab[qi[i],qi[j]]):
A[i,j] = 1
A[j,i] = 1
idx += 1
return A
def BuildGraphSym(q,N,c,ϵ):
na = np.ones(q)/q
Na = np.zeros(q,int)
cum = 0
for i in range(q-1):
Na[i] = int(N*na[i])
cum += Na[i]
Na[q-1] = N-cum
# print(Na)
qi = np.zeros(N,int)
cum = 0
for i in np.arange(0,q-1):
qi[cum+Na[i]:] += 1
cum += Na[i]
cin = q*c/(1+ϵ*(q-1))
print("cin=",cin)
cout = ϵ*cin
print("cout=",cout)
p_ab = np.ones((q,q))*cout/N
for i in range(q):
p_ab[i,i] = cin/N
A = BuildAdj(qi,p_ab,N)
return p_ab,qi,Na,A
c=3
q=3
N=100
ϵ=0.05
pab,qi,Na,A = BuildGraphSym(q,N,c,ϵ)
plt.imshow(A)
print("Threshold=",(np.sqrt(c)-1)/(np.sqrt(c)+q-1))
In [28]:
G = nx.from_numpy_matrix(A)
nx.draw(G,pos=nx.spring_layout(G,k=0.1,scale=10))
In [29]:
# Let's check for disconnected components
# isolated nodes
print(np.where(np.sum(A,1)==0))
print(np.where(np.sum(A,1)==0)[0].shape)
In [30]:
# Define de Laplacian
D = np.diag(np.sum(A,1))
L = D-A
print("Nb of disjoint compontents=",np.where(np.linalg.eigvalsh(L)<1e-8)[0].shape)
In [32]:
v = np.zeros(N)
v[2] = 1
for it in range(200):
v = np.matmul(L,v)
v = v/np.linalg.norm(v)
# Disconnected pieces
Disc_part = np.where(v==0)
print(Disc_part)
print(Disc_part[0].shape)
In [62]:
c=12
q=3
N=500
ϵ=0.01
pab,qi,Na,A = BuildGraphSym(q,N,c,ϵ)
plt.imshow(A)
print("Threshold=",(np.sqrt(c)-1)/(np.sqrt(c)+q-1))
In [58]:
# find the eigenvalues
D = np.diag(np.sum(A,1))
L = D-A
Eval,Evec = np.linalg.eigh(L)
In [59]:
# show eigenvalues
plt.plot(Eval)
Out[59]:
In [60]:
# Scatter plot of the eigenvalues
f,ax = plt.subplots(1,5,figsize=(20,5))
ax[0].plot(Eval[:10])
ax[1].plot(Evec[:,1])
ax[2].plot(Evec[:,2])
ax[4].plot(Evec[:,3])
for i in range(q):
idq = np.where(qi==i)[0]
ax[3].scatter(Evec[idq,1],Evec[idq,2])
In [71]:
# we change and see that we cannot find the communities
ϵ=0.2
pab,qi,Na,A = BuildGraphSym(q,N,c,ϵ)
# find the eigenvalues
D = np.diag(np.sum(A,1))
L = D-A
# Scatter plot of the eigenvalues
Eval,Evec = np.linalg.eigh(L)
f,ax = plt.subplots(1,5,figsize=(20,5))
ax[0].plot(Eval[:10])
ax[1].plot(Evec[:,1])
ax[2].plot(Evec[:,2])
ax[4].plot(Evec[:,3])
for i in range(q):
idq = np.where(qi==i)[0]
ax[3].scatter(Evec[idq,1],Evec[idq,2])
Belief Propagation¶
BP is made of a series of simple updates on local messages that lie between two sites
- The first step to prepare the algorithm is to make array with a list of neighbors for each site. Hence an array where in the cell $i$, it contains the list of all its neighbors
- We also need to invert this list. Take for instance for the site $i$. We need to know the index at which the site $i$ is localized in the list of neighbors of its neighbors:
- Once those list are made, you can compute the iterative equations
and
$$ h^i (t_i) = \frac{1}{N} \sum_k \sum_{t_k} c_{t_k t_i} \psi^k(t_k) $$In [77]:
from itertools import permutations
In [96]:
def getNN(A,N):
NN = []
for i in range(N):
NN.append(np.where(A[:,i]!=0)[0])
return NN
def getIdN(NN,N):
IdN = []
for i in range(N):
idd = []
for id_j in NN[i]:
num_i = np.where(NN[id_j]==i)[0][0]
#print(i," ",num_i," ",id_j)
idd.append(num_i)
IdN.append(idd)
return IdN
In [120]:
# Rdm init for ψ
def InitΨ(N,NN,q):
Ψ = []
for i in range(N):
rdm_numb = np.random.random((len(NN[i]),q))
rdm_numb /= np.sum(rdm_numb,1).reshape((rdm_numb.shape[0],1))
Ψ.append(rdm_numb)
return Ψ
def InitΨi(N,q):
Ψi = np.zeros((N,q))
for i in range(N):
rdm_numb = np.random.random(q)
rdm_numb /= np.sum(rdm_numb)
Ψi[i,:]=(rdm_numb)
return Ψi
# update i -> j
def UpdΨ(i,j,id_i,Ψ,h,na,cab,NN,IdN,q,dump=0.5):
new_msg = np.ones(q)
for c in range(q):
for k in range(len(NN[i])):
if NN[i][k] != j:
new_msg[c] *= np.matmul(Ψ[i][k,:],cab[:,c])
new_msg[c] *= na[c]*np.exp(-h[c])
return new_msg / new_msg.sum()
def AllΨ(Ψ,Ψi,h,na,cab,N,NN,IdN,q):
for i in np.random.permutation(N):
new_msg = np.zeros((len(NN[i]),q))
# compute the new outgoing messages from i Ψ^{i->j}
for j in range(len(NN[i])):
new_msg[j,:] = UpdΨ(i,NN[i][j],IdN[i][j],Ψ,h,na,cab,NN,IdN,q)
# remove the contribution from Ψ^i
h -= 1/N*np.matmul(cab,Ψi[i,:])
# update the messages and marginals
for j in range(len(NN[i])):
Ψ[NN[i][j]][IdN[i][j],:] = new_msg[j,:]
Ψi[i,:] = ComputeMarg(i,Ψ,h,na,cab,NN,q)
h += 1/N*np.matmul(cab,Ψi[i,:])
def Updh(Ψi,N):
return np.sum(np.matmul(Ψi,cab),0)/N
def ComputeMarg(i,Ψ,h,na,cab,NN,q):
Ψi = np.ones(q)
for c in range(q):
for k in range(len(NN[i])):
Ψi[c] *= np.matmul(Ψ[i][k,:],cab[:,c])
# Ψi[c] *= (Ψ[i][k,:]*cab[c,:]).sum()
Ψi[c] *= na[c]*np.exp(-h[c])
return Ψi / Ψi.sum()
In [98]:
def RunBP(N,NN,IdN,q,na,cab,tmax=10):
Ψ = InitΨ(N,NN,q)
Ψi = InitΨi(N,q)
h = Updh(Ψi,N)
for t in range(tmax):
AllΨ(Ψ,Ψi,h,na,cab,N,NN,IdN,q)
Ψi = np.zeros((N,q))
for i in range(N):
Ψi[i,:] = ComputeMarg(i,Ψ,h,na,cab,NN,q)
return Ψi
In [251]:
c=12
q=3
N=1000
OvTrue = np.array([])
for ϵ in np.arange(0,0.6,0.025):
pab,qi,Na,A = BuildGraphSym(q,N,c,ϵ)
cab = pab*N
NN = getNN(A,N)
IdN = getIdN(NN,N)
Ψi = RunBP(N,NN,IdN,q,Na/N,cab)
perm = permutations([0, 1, 2])
# Print the obtained permutations
Ov = []
for p in list(perm):
Ov_tmp = 0
for i in range(N):
Ov_tmp += Ψi[i,p[qi[i]]]
Ov.append(Ov_tmp/N)
print(np.max(Ov))
OvTrue = np.append(OvTrue,np.max(Ov))
In [253]:
plt.plot(np.arange(0,0.6,0.025),(OvTrue-0.333)/(1-0.333))
Out[253]:
In [ ]: