TP1-Correction

Posted on Sat 16 February 2019 in posts

In [1]:
# import
import numpy as np
import matplotlib.pyplot as pp
In [2]:
# Générer des nombres aléatoires uniformes [0,1]
va = np.random.random((100000))
In [3]:
# faire un histogramme
def count_elements(seq, BInf,BSup, NBins):
    δ = (BSup-BInf)/NBins
    hist = np.zeros((NBins))
    for i in seq:
        idx = int(np.floor((i-BInf)/δ))
        if((idx<0)|(idx>=NBins)):
           print("range error!")
           break;
        hist[idx] += 1
    norm = 0
    for i in hist:
        norm += i*δ
    return hist,norm

# on fait l'histogramme des v.a. uniformes
h1,norm = count_elements(va,0,1,100)
print(h1)
print(h1/norm)
pp.plot(0)
pp.plot(2)
pp.plot(h1/norm)
[ 971. 1004. 1007.  950.  988.  960. 1067.  944. 1057.  994. 1046. 1000.
 1034.  979.  986.  964.  994. 1037. 1015. 1044.  996.  983. 1052. 1014.
 1054.  987.  971.  954. 1004.  985. 1005. 1020.  981.  953. 1043.  982.
  998. 1019. 1004. 1004.  971.  971.  965. 1050.  952.  963. 1048. 1005.
  987. 1012. 1025. 1017. 1009. 1035.  972.  915. 1032. 1075.  916.  950.
  989. 1027.  975.  971. 1022. 1053. 1018.  949. 1006.  979. 1092.  997.
  979. 1017. 1011.  994.  994.  967.  994. 1007.  999.  980. 1026.  955.
  954.  964. 1073.  997. 1038. 1007. 1025. 1033.  961. 1028. 1010. 1008.
  989. 1014. 1014.  964.]
[0.971 1.004 1.007 0.95  0.988 0.96  1.067 0.944 1.057 0.994 1.046 1.
 1.034 0.979 0.986 0.964 0.994 1.037 1.015 1.044 0.996 0.983 1.052 1.014
 1.054 0.987 0.971 0.954 1.004 0.985 1.005 1.02  0.981 0.953 1.043 0.982
 0.998 1.019 1.004 1.004 0.971 0.971 0.965 1.05  0.952 0.963 1.048 1.005
 0.987 1.012 1.025 1.017 1.009 1.035 0.972 0.915 1.032 1.075 0.916 0.95
 0.989 1.027 0.975 0.971 1.022 1.053 1.018 0.949 1.006 0.979 1.092 0.997
 0.979 1.017 1.011 0.994 0.994 0.967 0.994 1.007 0.999 0.98  1.026 0.955
 0.954 0.964 1.073 0.997 1.038 1.007 1.025 1.033 0.961 1.028 1.01  1.008
 0.989 1.014 1.014 0.964]
Out[3]:
[<matplotlib.lines.Line2D at 0x7fd5425cd748>]
In [4]:
# variables gaussiennes
va_normal = np.random.normal(size=(100000))
δ = (max(va_normal)+0.01-(min(va_normal)-0.01))/100
xr = np.arange(min(va_normal)-0.01,max(va_normal)+0.01,δ)
h2,norm=count_elements(va_normal,min(va_normal)-0.01,max(va_normal)+0.01,100)

# on definit la pdf
def gauss(x):
    return 1/np.sqrt(2*np.pi) * np.exp(-x**2 / 2.0)

# compararaison entre pdf et histogramme
pp.plot(xr,h2/norm)
pp.plot(xr,gauss(xr))
Out[4]:
[<matplotlib.lines.Line2D at 0x7fd542ec48d0>]
In [5]:
# numpy sait le faire
# vérifier la fonction de répartition : faire un histogramme
f,ax = pp.subplots(1,3,figsize=(15,5))
ax[0].hist(va, range=(0,1), bins=10)
ax[1].hist(va_normal,bins=50)
ax[2].hist(va_normal,bins=50,density=True)
ax[2].plot(xr,gauss(xr))
Out[5]:
[<matplotlib.lines.Line2D at 0x7fd5432c2f28>]

We generate correlated variables : if (y<0) we put a gaussian distribution with negative mean, and positive mean for (y>0) Using Baysian rule : $p(x,y) = p(x|y)p(y)$ with $p(y) \sim \cal{U}[-1;1]$

We obtain $p(x|y) = \cal{N}(-5,1)\theta(y) + \cal{N}(5,1)\theta(-y)$

In [6]:
# p(x,y) = p(x|y)p(y)
# p(y) ~ unif(-1,1)
# p(x|y) = Normal(-5,1)\theta(y) + Normal(5,1)\theta(-y)

va_y = np.random.random(size=(100000))*2-1
va_x = np.zeros(va_y.shape[0])
yneg = np.where(va_y < 0)
ypos = np.where(va_y >= 0)
va_x[yneg[0]] = np.random.normal(-5,size=(len(yneg[0])))
va_x[ypos[0]] = np.random.normal(5,size=(len(ypos[0])))

pp.scatter(va_x,va_y)
Out[6]:
<matplotlib.collections.PathCollection at 0x7fd542057e48>
In [7]:
fig = pp.figure(figsize=(10, 5))
pp.hist2d(va_x,va_y,bins=100,density=True)
pp.colorbar(orientation='horizontal')
Out[7]:
<matplotlib.colorbar.Colorbar at 0x7fd5421ba8d0>
In [10]:
# Loi des gds nombres et convergence
# commençons par des variables de bernoulli

# Nva number of variables to generate
Nva = 10**np.arange(1,8,0.2)
print(Nva)
# number of time to repeat the process
NRESH = 1
arr_mean = []
for i in Nva: 
    m=0
    for j in range(NRESH):
        v_b = np.random.randint(2,size=(int(i)))
        m += np.mean(v_b)
    arr_mean = np.append(arr_mean,m/NRESH)

# allure des courbes vs loi de puissance en racine carrée
pp.loglog(Nva,np.abs(arr_mean-0.5))
pp.loglog(Nva,Nva**(-0.5)/Nva[0]**(-0.5)*np.abs(arr_mean-0.5)[0])
[1.00000000e+01 1.58489319e+01 2.51188643e+01 3.98107171e+01
 6.30957344e+01 1.00000000e+02 1.58489319e+02 2.51188643e+02
 3.98107171e+02 6.30957344e+02 1.00000000e+03 1.58489319e+03
 2.51188643e+03 3.98107171e+03 6.30957344e+03 1.00000000e+04
 1.58489319e+04 2.51188643e+04 3.98107171e+04 6.30957344e+04
 1.00000000e+05 1.58489319e+05 2.51188643e+05 3.98107171e+05
 6.30957344e+05 1.00000000e+06 1.58489319e+06 2.51188643e+06
 3.98107171e+06 6.30957344e+06 1.00000000e+07 1.58489319e+07
 2.51188643e+07 3.98107171e+07 6.30957344e+07]
Out[10]:
[<matplotlib.lines.Line2D at 0x7fd541a59630>]
In [9]:
# somme de variables aléatoire, convergence vers la loi gaussienne
n_var = 10000
n_sum = 10000
est_m = []
for i in range(n_var):
    est_m = np.append(est_m,np.sum(np.random.randint(2,size=(n_sum))))

Z = np.sqrt(n_sum)*(est_m/n_sum - 0.5)/0.5;
pp.hist(Z,bins=40,density=True)
pp.plot(xr,gauss(xr))
Out[9]:
[<matplotlib.lines.Line2D at 0x7fd541b7c588>]
In [ ]: