%matplotlib inline
from ipywidgets import *
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')
import numpy as np
import scipy.stats as stats
import matplotlib.patches as mpatches
def fill(lower, upper, mu, sigma, col='#add8e6', ax=None):
x = np.linspace(lower, upper, 100)
y = stats.norm.pdf(x,mu,sigma)
if ax:
ax.fill_between(x, y, color=col, alpha=0.5)
else:
plt.fill_between(x, y, color=col, alpha=0.5)
def norm_filled(mu, sigma, filled=False, ax=None):
x_min = mu-3.5*sigma
x_max = mu+3.5*sigma
x = np.linspace(x_min, x_max, 100)
if not ax:
plt.plot(x, stats.norm.pdf(x, mu, sigma))
else:
ax.plot(x, stats.norm.pdf(x, mu, sigma))
if filled:
fill(mu-3.5*sigma, mu+3.5*sigma, mu, sigma, ax=ax)
return x_min, x_max
def iq_sample_mean_dist(x_bar=101, normalized=False, c_crit=False, n=30):
mu = 0 if normalized else 100
sigma = 1 if normalized else 15/np.sqrt(n)
norm_filled(mu, sigma, True)
plt.xlabel(r"$\bar{X}_{IQ}$")
plt.xlim(mu-3.25*sigma,mu+3.25*sigma)
if normalized:
x_bar = (x_bar-100)/(15/np.sqrt(n))
plt.vlines(x=x_bar, ymin=0, ymax=stats.norm.pdf(x_bar, mu, sigma))
plt.gca().set_ylim(bottom=0)
title = r"$z=$" if normalized else r"$\bar{x}=$"
title += str(round(x_bar,2))
if c_crit:
z_crit = 1.96
if not normalized:
z_crit = (z_crit*sigma+mu)
fill(z_crit, mu+3.5*sigma, mu, sigma, "#ff4c4c")
plt.title(title)
def critical_region(test_type="right"):
mu = 0
sigma = 1
x_min, x_max = norm_filled(mu, sigma)
alpha = 0.05
if test_type == "left":
z_crit = stats.norm.ppf(alpha, mu, sigma)
fill(x_min, z_crit, mu, sigma, "#ff4c4c")
fill(z_crit, x_max, mu, sigma)
plt.title(r"Test lewostronny: $C_{kr} = (-\infty, -z_{kr})$")
plt.text(-2.25, 0.02, r"$\alpha\%$", fontsize=12)
elif test_type == "right":
z_crit = stats.norm.ppf(1-alpha, mu, sigma)
fill(x_min, z_crit, mu, sigma)
fill(z_crit, x_max, mu, sigma, "#ff4c4c")
plt.title(r"Test prawostronny: $C_{kr} = (z_{kr},\infty)$")
plt.text(1.75, 0.02, r"$\alpha\%$", fontsize=12)
else:
z_crit = stats.norm.ppf(alpha/2, mu, sigma)
fill(z_crit, -1*z_crit, mu, sigma)
fill(x_min, z_crit, mu, sigma, "#ff4c4c")
fill(-1*z_crit, x_max, mu, sigma, "#ff4c4c")
plt.title(r"Test: dwustronny: $C_{kr} = (-\infty, -z_{kr}) \cup (z_{kr},\infty)$")
plt.text(2, 0.005, r"$\alpha/2\%$", fontsize=10)
plt.text(-2.5, 0.005, r"$\alpha/2\%$", fontsize=10)
def p_value(x_bar=106):
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,5))
mu = 0
sigma = 1
alpha = 0.05
z_crit = stats.norm.ppf(1-alpha, mu, sigma)
for ax in axes:
norm_filled(mu, sigma, True, ax)
fill(z_crit, 3.5, mu, sigma, "#ff4c4c", axes[0])
x_bar = (x_bar-100)/(15/np.sqrt(30))
axes[0].vlines(x=x_bar, ymin=0, ymax=stats.norm.pdf(x_bar, mu, sigma))
axes[0].set_title(r"z="+str(round(x_bar,2)))
axes[1].vlines(x=x_bar, ymin=0, ymax=stats.norm.pdf(x_bar, mu, sigma))
fill(x_bar, 3.5, mu, sigma, "#ffa500", axes[1])
p_val = 1-stats.norm.cdf(x_bar, mu, sigma)
axes[1].set_title(r"p-value="+str(round(p_val,4)))
def alpha_beta(alpha=0.05, mu2=107, n=30):
mu1 = 100
sigma = 15/np.sqrt(n)
xs = [np.linspace(m-3.5*sigma, m+3.5*sigma, 100) for m in [mu1, mu2]]
for x, m in zip(xs,[mu1, mu2]):
plt.plot(x, stats.norm.pdf(x, m, sigma))
z_crit = stats.norm.ppf(1-alpha, mu1, sigma)
fill(z_crit, mu1+3.5*sigma, mu1, sigma, "#ff4c4c")
fill(mu2-3.5*sigma, z_crit, mu2, sigma, "#98fb98")
plt.gca().set_ylim(bottom=0)
beta = stats.norm.cdf(z_crit, mu2, sigma)
plt.title(r"$\beta=$"+str(round(beta,2)))
a = mpatches.Patch(color='#ff4c4c', label=r'$\alpha$', alpha=0.5)
b = mpatches.Patch(color='#98fb98', label=r'$\beta$', alpha=0.5)
plt.legend(handles=[a, b])
$IQ \sim N(100, 15)$
$\bar{IQ}_{30} \sim N(100, \frac{15}{\sqrt{30}})$
interact(iq_sample_mean_dist, x_bar=(100, 110, 1), normalized=False, c_crit=False, n=fixed(30))
Przykład: $X \sim N(\mu, 15)$
$H_0: \theta = \theta_0$
$H_1: \theta > \theta_0$
$H_0: \theta = \theta_0$
$H_1: \theta < \theta_0$
$H_0: \theta = \theta_0$
$H_1: \theta \neq \theta_0$
Przykład:
$\mu_0 = 100$
Układ hipotez:
$H_0: \mu = 100$
$H_1: \mu > 100$
Przykład: zmienna o rozkładzie normalnym ze znaną wariancją:
$Z=\frac{\bar{X}-\mu_0}{\frac{\sigma}{\sqrt{n}}}$
interact(critical_region, test_type=["left", "right", "two-sided"])
$Z \in C_{kr}: \textrm{ Odrzucamy } H_0 \textrm{ na rzecz } H_1$
$Z \notin C_{kr}: \textrm{ Brak podstaw do odrzucenia } H_0$
Przykład: test prawostronny, $\alpha=0.05$
$C_{kr} = (1.644854, \infty)$
Dla $\bar{x} = 101$:
$Z = \frac{101-100}{\frac{15}{\sqrt{30}}} = 0.3651484$
$Z \notin C_{kr}$
$ \textrm{ Brak podstaw do odrzucenia } H_0$
Dla $\bar{x} = 106$:
$Z = \frac{106-100}{\frac{15}{\sqrt{30}}} = 2.19089$
$Z \in C_{kr}$
$ \textrm{ Odrzucamy } H_0 \textrm{ na rzecz } H_1$
interact(p_value, x_bar=(100, 110, 1))
Przykład:
$\alpha = 0.05$
$P(\bar{X}>106|\mu=100)$
$=P(Z>\frac{106-100}{{\frac{15}{\sqrt{30}}}})$
$=P(Z>2.19089) \approx 0.014$
$p < \alpha$
$ \textrm{ Odrzucamy } H_0 \textrm{ na rzecz } H_1$
$H_0: \mu = \mu_0$
$H_1: \mu > / \neq / < \mu_0$
Odrzucamy $H_0$ | Nie odrzucamy $H_0$ | |
---|---|---|
$H_0$ prawdziwa | Błąd I rodzaju $\alpha$ | Decyzja właściwa |
$H_0$ fałszywa | Decyzja właściwa ($1-\beta$) | Błąd II rodzaju ($\beta$) |
interact(alpha_beta, alpha=(0.01, 0.25, 0.01), mu2=(100, 110, 1), n=(30, 60, 5))
$H_0: p = p_0$
$H_1: p < p_0 $
$ S_n = \sum_{i=1}^{n}X_i \sim B_n(p) $
$ S_n = \sum_{i=1}^{n}X_i \sim B_n(p) $
$E[S_n] = n \cdot p_0$
$D^2[S_n] = n \cdot p_0 \cdot(1-p_0)$
$D[S_n] = \sqrt{n \cdot p_0 \cdot(1-p_0)}$
$Z = \frac{S_n - E[S_n]}{D[S_n]} = \frac{S_n - n \cdot p_0}{\sqrt{n \cdot p_0 \cdot (1 - p_0)}}$
$ Z \sim N(0,1) $
$ \bar{X} = \frac{\textrm{liczba sukcesów}}{n} $
$E[\bar{X}] = p_0$
$D[\bar{X}] = \sqrt{\frac{{p_0(1-p_0)}}{n}}$
$Z = \frac{\bar{X}-E[\bar{X}]}{D[\bar{X}]}=\frac{\bar{X}-p_0}{\sqrt{\frac{{p_0(1-p_0)}}{n}}}$