תרגול 1 - חזרה על הסתברות וחיזוי

Setup¶

## Importing packages
import os # A build in package for interacting with the OS. For example to create a folder.
import numpy as np  # Numerical package (mainly multi-dimensional arrays and linear algebra)
import pandas as pd  # A package for working with data frames
import matplotlib.pyplot as plt  # A plotting package
import imageio  # A package to read and write image (is used here to save gif images)

## Setup matplotlib to output figures into the notebook
## - To make the figures interactive (zoomable, tooltip, etc.) use ""%matplotlib notebook" instead
%matplotlib inline

## Setting some nice matplotlib defaults
plt.rcParams['figure.figsize'] = (4.5, 4.5)  # Set default plot's sizes
plt.rcParams['figure.dpi'] = 120  # Set default plot's dpi (increase fonts' size)
plt.rcParams['axes.grid'] = True  # Show grid by default in figures

## Auxiliary function for prining equations, pandas tables and images in cells output
from IPython.core.display import display, HTML, Latex

## Create output folder
if not os.path.isdir('./output'):
    os.mkdir('./output')

Patients Distribution Model¶

Pulse vs Number of Coughs¶

.	$\text{c}=0$	$\text{c}=1$	$\text{c}=2$	$\text{c}=3$
$\text{p}=50$	0	0.15	0.2	0.05
$\text{p}=60$	0.08	0.03	???	0.04
$\text{p}=70$	0.02	0.03	0.04	0.01
$\text{p}=80$	0.1	0.05	0.05	0.1

## P_{X,Y} joint distribution
## --------------------------
p_values = np.array([50, 60, 70, 80])
c_values = np.array([0, 1, 2, 3])
p_pc = pd.DataFrame([[0   , 0.15, 0.2   , 0.05],
                     [0.08, 0.03, np.nan, 0.04],
                     [0.02, 0.03, 0.04  , 0.01],
                     [0.1 , 0.05, 0.05  , 0.1 ]],
                     index=p_values,
                     columns=c_values)
display(HTML(p_pc.to_html()))

Ex. 1.2¶

Sec. 1.2.1¶

Calculating $p_{\text{p},\text{c}}(60,2)$

p_pc.loc[60, 2] = 1 - np.nansum(p_pc)
display(
    Latex(r'$p_{\text{p},\text{c}}(60,2)=' + f'{p_pc.loc[60, 2]:.3}$'),
    Latex(r'$p_{\text{p},\text{c}}$:'),
    HTML(p_pc.to_html()),
    )

Sec. 1.2.2¶

Calculating $p_{\text{p}\lvert\text{c}}(60\lvert \text{c}=0)$

p_p_60_given_c_0 = p_pc.loc[60, 0] / p_pc.loc[:, 0].sum()
display(Latex(r'$p_{\text{p},\text{c}}(1|50)=' + f'{p_p_60_given_c_0:.3}$'))

sec. 1.2.3¶

Calculating $p_{\text{p}}$

p_p = p_pc.sum(axis=1)
display(
    Latex(r'$p_{\text{p}}(p)$:'),
    HTML(p_p.to_frame().to_html())
    )

Calculating $\text{Pr}(\text{p}_1\geq70\cup\text{p}_2\geq70\cup\ldots\cup\text{p}_{10}\geq70)$

prob = p_p[p_values >= 70].sum() ** 10
display(Latex(r'$\text{Pr}(\text{p}_1\geq70\cup\text{p}_2\geq70\cup\ldots\cup\text{p}_{10}\geq70)=' + f'{prob:.3}$'))

Ex 1.3¶

## Import the normal distribution model from SciPy
from scipy.stats import norm

temp_grid = np.arange(35, 42, 0.01)
sigma = 1
p_t_given_p_dist = {p: norm(32 + 0.1 * p, sigma) for p in p_values}
p_t_given_p = lambda t, p: p_t_given_p_dist[p].pdf(t)

## Ploting
fig, ax = plt.subplots(figsize=(4.5, 3))
ax.set_title(r'$p_{t|p}(t|p)$')
for p in p_values:
    ax.plot(temp_grid, p_t_given_p(temp_grid, p), label=f'$p={p}$')
ax.set_xlabel('Body temperature t[°]')
ax.set_ylabel(r'$p_{t|p}(t|p)$')
ax.legend(loc='upper right')
plt.tight_layout()
fig.savefig('./output/dist_t_given_p.png')

Calculating $p_{\text{t},\text{p}}(t,p)=p_{\text{t}|\text{p}}(t|p)p_{\text{p}}(p)$

p_tp = lambda t, p: p_t_given_p(t, p) * p_p[p]

## ploting
fig, ax = plt.subplots(figsize=(4.5, 3))
ax.set_title(r'$p_{t,p}(t,p)=p_{t|p}(t|p)p_{p}(p)$')
for p in p_values:
    ax.plot(temp_grid, p_tp(temp_grid, p), label=f'$p={p}$')
ax.set_xlabel('Body temperature t[°]')
ax.set_ylabel(r'$p_{t,p}(t,p)$')
ax.plot([39, 39], [0, 0.2], '--', color='gray')
ax.legend(loc='upper right')
plt.tight_layout()
ax.set_xlim(35, 42)
ax.set_ylim(0, 0.2)
fig.savefig('./output/dist_tp.png')

p_tp_t_39 = pd.Series([p_tp(39, p) for p in p_values], index=p_values)

display(
    Latex(r'$p_{\text{t},\text{p}}(39,p)$:'),
    HTML(p_tp_t_39.to_frame().to_html()),
    )

Calculating $p_{\text{t}}(t)=\sum_p p_{\text{t},\text{p}}(t,p)$

p_t = lambda t: np.stack([p_tp(t, p) for p in p_values], axis=0).sum(axis=0)

## ploting
fig, ax = plt.subplots(figsize=(4.5, 3))
ax.set_title(r'$p_t(t)$')
ax.plot(temp_grid, p_t(temp_grid));
ax.set_xlabel('Body temperature t[°]')
ax.set_ylabel(r'$p_t(t)$')
plt.tight_layout()
ax.set_xlim(35, 42)
fig.savefig('./output/dist_t.png')

p_39 = p_t(39)

display(Latex(r'$p_\text{t}(39)=' + f'{p_39:.3}$'))

Calculating $p_{\text{p}|\text{t}}(t|p)=\frac{p_{\text{t},\text{p}}(t,p)}{p_{\text{t}}(t)}$

p_p_given_t = lambda p, t: p_tp(t, p) / p_t(t)

fig, ax = plt.subplots(figsize=(4.5, 3))
ax.set_title(r'$p_{p|t}(p|t)$')
for p in p_values:
    ax.plot(temp_grid, p_p_given_t(p, temp_grid), label=f'$p={p}$')
ax.set_xlabel('Body temperature t[°]')
ax.set_ylabel(r'$p_{p|t}(p|t)$')
ax.plot([39, 39], [0, 1], '--', color='gray')
ax.legend(loc='upper right')
ax.set_xlim(35, 42)
ax.set_ylim(0, 1)
plt.tight_layout()
fig.savefig('./output/dist_p_given_t.png')

p_p_given_t_df = lambda t: pd.DataFrame([p_p_given_t(p, t) for p in p_values], index=p_values, columns=t)

fig, ax = plt.subplots(figsize=(4.5, 3))
ax.set_title(r'$F_{p|t}(p|t)$ (Cumulative Distribution)')
ax.stackplot(temp_grid, p_p_given_t_df(temp_grid).values, labels=[f'$p={p}$' for p in p_values])
ax.set_xlabel('Body temperature t[°]')
ax.set_ylabel(r'$F_{p|t}(p|t)$')
ax.plot([39, 39], [0, 1], '--', color='black')
ax.legend(loc='upper right')
plt.tight_layout()
ax.set_xlim(35, 42)
ax.set_ylim(0, 1)
fig.savefig('./output/dist_p_given_t_stack.png')

display(
    Latex(r'$p_{\text{p}|\text{t}}(p|39)$:'),
    HTML(p_p_given_t_df([39]).to_html()),
    )

Ex. 2.5¶

p_hat_misclass = lambda t: p_p_given_t_df(t).idxmax(axis=0).values
p_hat_mae= lambda t: (p_p_given_t_df(t).cumsum(axis=0) > 0.5).idxmax(axis=0).values
p_hat_mse= lambda t: (p_values[:, None] * p_p_given_t_df(t)).sum(axis=0)

fig, ax = plt.subplots(figsize=(4.5, 3))
ax.set_title('Optimal Predictors for Different Risks')
ax.plot(temp_grid, p_hat_misclass(temp_grid), label='Miscalssification')
ax.plot(temp_grid, p_hat_mae(temp_grid), label='MAE')
ax.plot(temp_grid, p_hat_mse(temp_grid), label='MSE')
ax.set_xlabel('Body temperature t[°]')
ax.set_ylabel(r'$\hat{p}$')
ax.plot([39, 39], [p_values[0], p_values[-1]], '--', color='gray')
ax.legend(loc='upper left')
ax.set_xlim(37, 40)
ax.set_ylim(49, 81)
plt.tight_layout()
fig.savefig('./output/p_predictors.png')

	0	1	2	3
50	0.00	0.15	0.20	0.05
60	0.08	0.03	NaN	0.04
70	0.02	0.03	0.04	0.01
80	0.10	0.05	0.05	0.10

	0
50	0.4
60	0.2
70	0.1
80	0.3

	0
50	0.021596
60	0.048394
70	0.039894
80	0.072591

	39
50	0.118352
60	0.265208
70	0.218627
80	0.397812