TP-GMM-Annealing
Posted on Thu 23 January 2020 in posts
Annealing and GMM¶
In this program we make an annealing procedure with many centers in the GMM model. We only adjust the centers and the inverse temperature $\beta$ corresponds to the inverse of the variance of each center
In [1]:
import numpy as np
import matplotlib.pyplot as pp
In [2]:
# Generate clusters
D= 2 # dim of the data
M= 200 # number of data
L = 10 # taille du cube
Nc = 3 # nb de clusters
centers = np.array([])
for c in range(Nc):
m = np.random.uniform(2*L,size=(D))-L
centers = np.append(centers,[m])
centers = centers.reshape(Nc,D)
ch = [c for c in range(Nc)]
data_lab = np.random.choice(ch,size=(M))
data = np.zeros((D,M))
for i in range(Nc):
idx = np.where(data_lab==i)
data[:,idx[0]] = centers[i].reshape(D,1) + np.random.normal(size=(D,len(idx[0])))
In [3]:
data = data - np.mean(data,1).reshape(2,1)
centers = centers - np.mean(centers,0).reshape(1,2)
In [4]:
pp.scatter(data[0,:],data[1,:])
pp.scatter(centers[:,0],centers[:,1])
Out[4]:
In [5]:
def initMeans(K,L,D):
μ = np.array([])
for c in range(K):
m = np.random.uniform(2*L,size=(D))-L
μ = np.append(μ,[m])
μ = μ.reshape(D,K)
return μ
In [6]:
def ComputeResp(β,μ,X):
# print(X.shape)
Resp = np.zeros((X.shape[1],μ.shape[1]))
for i in range(X.shape[1]):
for c in range(μ.shape[1]):
Resp[i,c] = np.exp(-β*np.linalg.norm(X[:,i] - μ[:,c])**2)
Resp[i,:] = Resp[i,:] / np.sum(Resp[i,:])
return Resp
μ = initMeans(3,L,2)
P = ComputeResp(1,μ,data)
In [7]:
def MoveMean(X,μ,P):
Λ = 1/np.sum(P,0)
return np.matmul(np.matmul(X,P),np.diag(Λ))
In [8]:
def ComputeFreeEne(β,X,μ):
F = 0
for i in range(X.shape[1]):
tmp = 0
for c in range(μ.shape[1]):
tmp += np.exp(-β*np.linalg.norm(X[:,i] - μ[:,c])**2)
F += np.log(tmp)
return F
def ComputeEner(X,P,μ,K):
E = np.zeros((K,X.shape[1]))
for i in range(X.shape[1]):
for c in range(K):
E[c,i] = np.linalg.norm(X[:,i] - μ[:,c])**2
return np.sum(np.diag(np.matmul(P,E)))/X.shape[1]
First, we compute the threshold at which the instability develops. This is given by $1/(2\lambda)$ where $\lambda$ is the highest eigenvalue of the covariance matrix
In [9]:
autov = 1/(2*np.linalg.eigvals(np.cov(data)))
print(autov[1])
The code below need to be adjust depending on the properties of the clueters
In [14]:
# Check with β
K=6
μ = initMeans(K,L,D)
f,ax = pp.subplots(1,3,figsize=(20,10))
ax[0].scatter(data[0,:],data[1,:])
Ener = np.array([])
FEner = np.array([])
Allβ = np.array([])
# for β in np.arange(0.005,0.2,0.005):
for β in np.arange(0.005,0.1,0.001):
μ = μ + 0.2*np.random.normal(size=(μ.shape))
for it in range(20):
# pp.scatter(μ[0,:],μ[1,:],s=100)
P = ComputeResp(β,μ,data)
μ = MoveMean(data,μ,P)
ax[0].scatter(μ[0,:],μ[1,:],s=100, c='red')
Ener = np.append(Ener,ComputeEner(data,P,μ,K))
FEner = np.append(FEner,ComputeFreeEne(β,data,μ)/β)
Allβ = np.append(Allβ,β)
ax[1].plot(Allβ,Ener)
ax[1].plot([np.min(autov),np.min(autov)],[0,Ener[0]])
# ax[1].plot([autov2[1],autov2[1]],[0,Ener[0]])
ax[2].plot(Allβ,FEner)
Out[14]:
Let's see if we can guess the second threshold
In [15]:
for i in range(3):
idx = np.where(data_lab==i)
pp.scatter(data[0,idx[0]],data[1,idx[0]])
pp.scatter(μ[0,:],μ[1,:])
Out[15]:
In [16]:
# We select the correct clusters
lab1 = 1
lab2 = 2
In [17]:
id1 = np.where(data_lab==lab1)
id2 = np.where(data_lab==lab2)
data_trunc = np.concatenate((data[:,id1[0]],data[:,id2[0]]),axis=1)
# data_trunc = data_trunc.reshape(2,data_trunc.shape[0]//2)
pp.scatter(data_trunc[0,:],data_trunc[1,:])
autov2 = 1/(2*np.linalg.eigvals(np.cov(data_trunc)))
In [20]:
autov2
Out[20]:
In [25]:
#Repeating the annealing
# Check with β
K=6
μ = initMeans(K,L,D)
f,ax = pp.subplots(1,3,figsize=(20,10))
ax[0].scatter(data[0,:],data[1,:])
Ener = np.array([])
FEner = np.array([])
Allβ = np.array([])
for β in np.arange(0.005,0.1,0.001):
μ = μ + 0.2*np.random.normal(size=(μ.shape))
for it in range(20):
P = ComputeResp(β,μ,data)
μ = MoveMean(data,μ,P)
ax[0].scatter(μ[0,:],μ[1,:],s=100, c='red')
Ener = np.append(Ener,ComputeEner(data,P,μ,K))
FEner = np.append(FEner,ComputeFreeEne(β,data,μ)/β)
Allβ = np.append(Allβ,β)
ax[1].plot(Allβ,Ener)
ax[1].plot([np.min(autov),np.min(autov)],[0,Ener[0]])
ax[1].plot([np.min(autov2),np.min(autov2)],[0,Ener[0]])
ax[2].plot(Allβ,FEner)
Out[25]:
In [ ]: