Normal distributions (aka Gaussian distribution) example#

figures/men/gauss.jpg
Unless specified differently, these slides are copyright CINECA 2019 and are released under the Attribution--NonCommercial--NoDerivs (CC BY-NC-ND) Creative Commons license, version 3.0. Uses not allowed by the above license need explicit, written permission from the copyright owner. For more information see: http://creativecommons.org/licenses/by-nc-nd/3.0/

Slides were authored by Sergio Orlandini.

import numpy as np
import matplotlib.pyplot as plt

Normal distribution#

$$ \large g(x) = \frac{1}{\sigma\sqrt{2 \pi}} e^{-\frac{(x - \mu)^2}{2 \sigma^2}}$$

# function domain

xmin =  0.
xmax = 10.
num_points = 200

x = np.linspace(xmin, xmax, num_points)
def gaussian(x, mu, sigma):
    """normal distribution"""
    return 1./(sigma * np.sqrt(2.*np.pi)) * np.exp(-(x - mu)**2 / (2.*sigma**2))
# distribution parameters

mu = 4.2
sigma = 1.
# iterating over domain

%time gauss = np.array([gaussian(i, mu, sigma) for i in x])
CPU times: user 1.49 ms, sys: 587 µs, total: 2.08 ms
Wall time: 2.09 ms
# numpy ufunc

%time gauss = gaussian(x, mu, sigma)
CPU times: user 117 µs, sys: 0 ns, total: 117 µs
Wall time: 123 µs
plt.plot(x, gauss)
plt.show()
_images/fa23b755fc7a692f4b732d833f780f8aac314240f9859007bfc2b3b4ee0b96fc.png
print("max =", gauss.max(), "at position", gauss.argmax(), ", x =", x[gauss.argmax()])
print("min =", gauss.min(), "at position", gauss.argmin(), ", x =", x[gauss.argmin()])
max = 0.3988534372131259 at position 84 , x = 4.2211055276381915
min = 1.9773196406244672e-08 at position 199 , x = 10.0
plt.plot(x, gaussian(x, 4.2, 1.0), label="$\mu = 4.2$, $\sigma = 1.0$")
plt.plot(x, gaussian(x, 6.2, 1.8), label="$\mu = 6.2$, $\sigma = 1.8$")
plt.plot(x, gaussian(x, 2.2, 1.2), label="$\mu = 2.2$, $\sigma = 1.2$")

plt.legend()

plt.show()
_images/30d6137fbb72389bad408637e90f8b783ef96af4ced8c107b5ceeb6ade1831f0.png

Sample a normal distribution#

np.random.normal

normal(loc=0.0, scale=1.0, size=None)
    Draw random samples from a normal (Gaussian) distribution.
sample = np.random.normal(loc=mu, scale=sigma, size=10000)

plt.hist(sample, bins=21, density=1, alpha=0.7, label="samples")
plt.plot(x, gauss, color='r', label="normal distribution")

plt.legend()
plt.show()
_images/73a23dcd6371be82cc9fa12f5b936257fa33faf9c20b9ae1396e60fa7facfb9d.png

Multivariate normal distribution#

The multivariate normal distribution is a generalization of the one-dimensional (univariate) normal distribution to higher dimensions.

The multivariate normal distribution $ \mathcal{N} ( \pmb\mu, \pmb\Sigma) $ of a d-dimensional random vector is:

$$ \large \frac{1}{\sqrt{2 \pi^d |\pmb\Sigma|}} ; \exp \Big( -\frac{1}{2} (\pmb x - \pmb\mu)^T \pmb\Sigma^{-1} (\pmb x - \pmb\mu) \Big) $$

where $\pmb x$ is a d-dimensional vector, $\pmb \Sigma$ is the covariance matrix, $|\pmb\Sigma|$ its determinant and $\pmb \mu$ is the mean vector

distribution domains#

In order to evaluate function on a grid meshgrid is very useful.

np.meshgrid

meshgrid(*xi, **kwargs)
    Return coordinate matrices from coordinate vectors.
    
    Make N-D coordinate arrays for vectorized evaluations of
    N-D scalar/vector fields over N-D grids, given
    one-dimensional coordinate arrays x1, x2,..., xn.
A, B = np.meshgrid(np.arange(3), np.arange(3))

A.shape, B.shape
((3, 3), (3, 3))
A
array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])
B
array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2]])
for i, j in zip(A.ravel(), B.ravel()):
    print(i, j)
0 0
1 0
2 0
0 1
1 1
2 1
0 2
1 2
2 2

np.ravel

ravel(a, order='C')
    Return a contiguous flattened array.

There are two conventions for indexing 2-dimensional arrays.
Matrix notation uses the first index to indicate the row and the second index to indicate the column (Matrix indexing).
This is the opposite of the geometrically oriented-convention for images (Cartesian indexing) where the first index represents x position (i.e., column) and the second represents y position (i.e., row)

see here for a detailed explanation.

The meshgrid supports both types of indexing conventions by defining indexing keyword argument.
With "ij" string the matrix indexing is selected, while using "xy" the cartesian indexing is used.
Cartesian indexing is the default.

In the case of 2D-arrays of dimension M and N
the output shape of "xy" cartesian indexing is (N, M)
while is (M, N) for "ij" matrix indexing.

Well. This is intriguing!

n = 3

A, B = np.meshgrid(np.arange(n), np.arange(n), indexing='ij')

for i, j in zip(A.ravel(), B.ravel()):
    print(i, j)
0 0
0 1
0 2
1 0
1 1
1 2
2 0
2 1
2 2
A, B = np.meshgrid(np.arange(n), np.arange(n), indexing='xy') # default

for i, j in zip(A.ravel(), B.ravel()):
    print(i, j)
0 0
1 0
2 0
0 1
1 1
2 1
0 2
1 2
2 2

distribution domains#

x1 = np.linspace(xmin, xmax, num_points)
x2 = np.linspace(xmin, xmax, num_points)

X1, X2 = np.meshgrid(x1, x2)
X1.shape, X2.shape
((200, 200), (200, 200))

distribution parameters#

muv = np.array([mu, mu + 3])

muv
array([4.2, 7.2])
#cov = np.array([[1, 0], [0, 1]])
cov = np.array([[1, 0.6], [0.6, 1]])

cov
array([[1. , 0.6],
       [0.6, 1. ]])

density probability function#

$$ \large \frac{1}{\sqrt{2 \pi^d |\pmb\Sigma|}} ; \exp \Big( -\frac{1}{2} (\pmb x - \pmb\mu)^T \pmb\Sigma^{-1} (\pmb x - \pmb\mu) \Big) $$

def gaussian_multivariate(x, mu, cov):
    pre_factor = 1.0 / np.sqrt((2*np.pi)**2 * np.linalg.det(cov))
    xm = x - mu
    lg = np.dot(np.linalg.inv(cov), xm)
    lg = np.dot(xm.T, lg)
    return pre_factor * np.exp(- lg / 2)
def gaussian_multivariate(x, mu, cov):
    pre = 1.0 / np.sqrt((2*np.pi)**2 * np.linalg.det(cov))
    xm = x - mu
    lg = np.linalg.solve(cov, xm).T.dot(xm) # Xm^T S^-1 Xm
    return pre * np.exp(- lg / 2)
pdf = np.empty((num_points, num_points))

for i in np.arange(num_points):
    for j in np.arange(num_points):
        point = (X1[i,j], X2[i,j])
        pdf[i,j] = gaussian_multivariate(point, muv, cov)
pdf = np.array([gaussian_multivariate(p, muv, cov) for p in zip(X1.ravel(), X2.ravel())])

pdf.shape = (num_points, num_points)
from matplotlib import cm

plt.imshow(pdf, origin='lower', extent=(xmin, xmax, xmin, xmax), cmap=cm.coolwarm)
plt.xlabel("X1")
plt.ylabel("X2")

plt.show()
_images/b36b8d946f4ad0c6ddf88bbb214bf24f4fe499380ab3cbf2a316cde7fff8a241.png
from scipy.stats import multivariate_normal

pos = np.dstack((X1, X2))

rv = multivariate_normal(muv, cov)
Z = rv.pdf(pos)
plt.imshow(pdf, origin='lower', extent=(xmin, xmax, xmin, xmax), cmap=cm.coolwarm)

plt.show()
_images/be5c1173cdee5a6adb38ddc5ee4ba6c8e51fef7f5ad3ff8e50cf91d1f3e6b5e9.png
#%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
#ax = fig.gca(projection='3d')
ax = fig.add_subplot(projection='3d')

surface = ax.plot_surface(X1, X2, pdf, alpha=0.95, cmap='viridis')

plt.xlabel("X1")
plt.ylabel("X2")

max_value = pdf.max()
offset = max_value * 0.1

cset = ax.contourf(X1, X2, pdf, zdir='z', offset=-offset, cmap=cm.coolwarm)
ax.set_zlim(-offset, max_value)

plt.show()
_images/0c4496ca31d71915059abdcc9fa6f778e289e02159cb1d9d466e646c47a862bc.png

Marginal distribution#

The marginal distribution is the distribution of a subset of variables from the original distribution. It represents the probability of the subset variables without reference of the irrelevant variables.
In order to compute marginal distribution over a variable in a multivariate normal distribution the irrelevant variable must be dropped out from the covariance matrix and from the mean vector.

Thus means that the marginal distributions for a bidimensional multivariate normal distribution of two variables $\text X1$ and $\text X2$ are:

$$

(1)#\[\begin{align} \mathbf{p}(\text X1) &= \mathcal{N} ( \pmb\mu_{\text X1}, \pmb\Sigma_{\text X1}) \\ \mathbf{p}(\text X2) &= \mathcal{N} ( \pmb\mu_{\text X2}, \pmb\Sigma_{\text X2}) \end{align}\]

$$

marginal_x1 = gaussian(x1, muv[0], cov[0,0])
marginal_x2 = gaussian(x2, muv[1], cov[1,1])
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

fig = plt.figure(figsize=(10, 10))
grid = gridspec.GridSpec(2, 2, width_ratios=[2, 1], height_ratios=[2, 1])

plt.subplot(grid[0])

plt.imshow(pdf, origin='lower', extent=(xmin, xmax, xmin, xmax), cmap=cm.coolwarm)
plt.xlabel("X1")
plt.ylabel("X2")

plt.subplot(grid[1])

plt.ylim(xmin, xmax)
plt.plot(marginal_x2, x, color='red')

plt.subplot(grid[2])

plt.xlim(xmin, xmax)
plt.plot(x, marginal_x1, color='blue')

plt.show()
_images/dbe96c32245ea0735d3302f32615c4e0af2e990701f09ca8afcbf81c90aec9eb.png

Conditional distribution#

In probability theory given two random variables $X$ and $Y$, the conditional probability of $Y$ given $X$ is the probability of $Y$ when $X$ is know.
The conditional distribution of a multivariate normal distribution is given by $$ \pmb p_{x|y} = \mathcal{N} ( \pmb\mu_{x|y}, \pmb\Sigma_{x|y}) $$

where

$$

(2)#\[\begin{align} \mu_{x|y} &= \mu_x + Z Y^{-1} (y - \mu_y) \\ \Sigma_{x|y} &= X - Z Y^{-1} Z' \end{align}\]

$$

with

$$ \pmb\Sigma =

(3)#\[\begin{bmatrix} X & Z \\ Z' & Y \end{bmatrix}\]

$$

X = cov[0, 0]; Y = cov[1, 1]; Z = cov[0, 1]

x2_condition = 8

mu_x1 = muv[0] + (Z * (1/Y) * (x2_condition - muv[1]))
cov_x1 = X - Z * (1/Y) * Z

x1_condition = 5

mu_x2 = muv[1] + (Z * (1/X) * (x1_condition - muv[0]))
cov_x2 = Y - Z * (1/X) * Z
conditional_x1 = gaussian(x1, mu_x1, cov_x1)
conditional_x2 = gaussian(x2, mu_x2, cov_x2)
fig = plt.figure(figsize=(10, 10))

grid = gridspec.GridSpec(2, 2, width_ratios=[2, 1], height_ratios=[2, 1])

plt.subplot(grid[0])

plt.imshow(pdf, origin='lower', extent=(xmin, xmax, xmin, xmax), cmap=cm.coolwarm)
plt.xlabel("X1")
plt.ylabel("X2")

plt.axvline(x1_condition, color='red')
plt.axhline(x2_condition, color='blue')

plt.subplot(grid[1])

plt.ylim(xmin, xmax)
plt.plot(conditional_x2, x2, color='red')

plt.subplot(grid[2])

plt.xlim(xmin, xmax)
plt.plot(x1, conditional_x1, color='blue')

plt.show()
_images/5acc3be37fe774f245802469196c3d35dac7d811441fcd3a2a0ac81111ad5b35.png

How to sample from a Multivariance Gaussian Distribution#

In order to sample a random vector $\pmb x$ from a multivariance Gaussian distribution we have to compute $\pmb\mu + \pmb A \pmb z$ where $\pmb A$ is a matrix such that $\pmb A\pmb A^T = \pmb\Sigma$ and can be computed with Cholesky decomposition and $\pmb z$ is a vector whose components are independent standard normal variables.

num_samples = 42

A = np.linalg.cholesky(cov)
z = np.random.normal(size=(2, num_samples))

samples = A.dot(z) + muv.reshape(2, 1)
plt.figure(figsize=(6, 6))

plt.imshow(pdf, origin='lower', extent=(xmin, xmax, xmin, xmax), cmap=cm.coolwarm)
plt.xlabel("X1")
plt.ylabel("X2")

plt.scatter(samples[0,:], samples[1,:], color='cyan')

plt.show()
_images/0f3786662f4f22147f71c11f914de0af7806ce84057849262dbb545da25faf23.png

Exercise#

  • plot conditional probability for:

    • $P_{X1|X2}$ when $X2 = 3$

    • $P_{X2|X1}$ when $X1 = 4.2$

# CODE HERE