{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from ipywidgets import *\n", "import matplotlib.pyplot as plt\n", "from IPython.display import set_matplotlib_formats\n", "set_matplotlib_formats('svg')\n", "import numpy as np\n", "import scipy.stats as stats\n", "import matplotlib.patches as mpatches" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def fill(lower, upper, mu, sigma, col='#add8e6', ax=None): \n", " x = np.linspace(lower, upper, 100)\n", " y = stats.norm.pdf(x,mu,sigma)\n", " if ax:\n", " ax.fill_between(x, y, color=col, alpha=0.5)\n", " else:\n", " plt.fill_between(x, y, color=col, alpha=0.5)\n", " \n", "def norm_filled(mu, sigma, filled=False, ax=None):\n", " x_min = mu-3.5*sigma\n", " x_max = mu+3.5*sigma\n", " x = np.linspace(x_min, x_max, 100)\n", " if not ax:\n", " plt.plot(x, stats.norm.pdf(x, mu, sigma))\n", " else:\n", " ax.plot(x, stats.norm.pdf(x, mu, sigma))\n", " if filled:\n", " fill(mu-3.5*sigma, mu+3.5*sigma, mu, sigma, ax=ax)\n", " return x_min, x_max\n", " \n", "def iq_sample_mean_dist(x_bar=101, normalized=False, c_crit=False, n=30):\n", " mu = 0 if normalized else 100\n", " sigma = 1 if normalized else 15/np.sqrt(n)\n", " norm_filled(mu, sigma, True)\n", " plt.xlabel(r\"$\\bar{X}_{IQ}$\")\n", " plt.xlim(mu-3.25*sigma,mu+3.25*sigma)\n", " if normalized:\n", " x_bar = (x_bar-100)/(15/np.sqrt(n))\n", " plt.vlines(x=x_bar, ymin=0, ymax=stats.norm.pdf(x_bar, mu, sigma))\n", " plt.gca().set_ylim(bottom=0)\n", " title = r\"$z=$\" if normalized else r\"$\\bar{x}=$\"\n", " title += str(round(x_bar,2))\n", " if c_crit:\n", " z_crit = 1.96\n", " if not normalized:\n", " z_crit = (z_crit*sigma+mu)\n", " fill(z_crit, mu+3.5*sigma, mu, sigma, \"#ff4c4c\")\n", " plt.title(title)\n", "\n", "def critical_region(test_type=\"right\"):\n", " mu = 0\n", " sigma = 1\n", " x_min, x_max = norm_filled(mu, sigma)\n", " alpha = 0.05\n", " if test_type == \"left\":\n", " z_crit = stats.norm.ppf(alpha, mu, sigma)\n", " fill(x_min, z_crit, mu, sigma, \"#ff4c4c\")\n", " fill(z_crit, x_max, mu, sigma)\n", " plt.title(r\"Test lewostronny: $C_{kr} = (-\\infty, -z_{kr})$\")\n", " plt.text(-2.25, 0.02, r\"$\\alpha\\%$\", fontsize=12)\n", " elif test_type == \"right\":\n", " z_crit = stats.norm.ppf(1-alpha, mu, sigma)\n", " fill(x_min, z_crit, mu, sigma)\n", " fill(z_crit, x_max, mu, sigma, \"#ff4c4c\")\n", " plt.title(r\"Test prawostronny: $C_{kr} = (z_{kr},\\infty)$\")\n", " plt.text(1.75, 0.02, r\"$\\alpha\\%$\", fontsize=12)\n", " else:\n", " z_crit = stats.norm.ppf(alpha/2, mu, sigma)\n", " fill(z_crit, -1*z_crit, mu, sigma)\n", " fill(x_min, z_crit, mu, sigma, \"#ff4c4c\")\n", " fill(-1*z_crit, x_max, mu, sigma, \"#ff4c4c\")\n", " plt.title(r\"Test: dwustronny: $C_{kr} = (-\\infty, -z_{kr}) \\cup (z_{kr},\\infty)$\")\n", " plt.text(2, 0.005, r\"$\\alpha/2\\%$\", fontsize=10)\n", " plt.text(-2.5, 0.005, r\"$\\alpha/2\\%$\", fontsize=10)\n", "\n", "def p_value(x_bar=106):\n", " fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,5))\n", " mu = 0\n", " sigma = 1\n", " alpha = 0.05\n", " z_crit = stats.norm.ppf(1-alpha, mu, sigma)\n", " for ax in axes:\n", " norm_filled(mu, sigma, True, ax)\n", " fill(z_crit, 3.5, mu, sigma, \"#ff4c4c\", axes[0])\n", " x_bar = (x_bar-100)/(15/np.sqrt(30))\n", " axes[0].vlines(x=x_bar, ymin=0, ymax=stats.norm.pdf(x_bar, mu, sigma))\n", " axes[0].set_title(r\"z=\"+str(round(x_bar,2)))\n", " axes[1].vlines(x=x_bar, ymin=0, ymax=stats.norm.pdf(x_bar, mu, sigma))\n", " fill(x_bar, 3.5, mu, sigma, \"#ffa500\", axes[1])\n", " p_val = 1-stats.norm.cdf(x_bar, mu, sigma)\n", " axes[1].set_title(r\"p-value=\"+str(round(p_val,4)))\n", "\n", "def alpha_beta(alpha=0.05, mu2=107, n=30):\n", " mu1 = 100\n", " sigma = 15/np.sqrt(n)\n", " xs = [np.linspace(m-3.5*sigma, m+3.5*sigma, 100) for m in [mu1, mu2]]\n", " for x, m in zip(xs,[mu1, mu2]):\n", " plt.plot(x, stats.norm.pdf(x, m, sigma))\n", " z_crit = stats.norm.ppf(1-alpha, mu1, sigma)\n", " fill(z_crit, mu1+3.5*sigma, mu1, sigma, \"#ff4c4c\")\n", " fill(mu2-3.5*sigma, z_crit, mu2, sigma, \"#98fb98\")\n", " plt.gca().set_ylim(bottom=0)\n", " beta = stats.norm.cdf(z_crit, mu2, sigma)\n", " plt.title(r\"$\\beta=$\"+str(round(beta,2)))\n", " a = mpatches.Patch(color='#ff4c4c', label=r'$\\alpha$', alpha=0.5)\n", " b = mpatches.Patch(color='#98fb98', label=r'$\\beta$', alpha=0.5)\n", " plt.legend(handles=[a, b])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Przykład: IQ" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![iq](https://upload.wikimedia.org/wikipedia/commons/thumb/3/39/IQ_distribution.svg/800px-IQ_distribution.svg.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](https://upload.wikimedia.org/wikipedia/commons/thumb/3/39/IQ_distribution.svg/800px-IQ_distribution.svg.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$IQ \\sim N(100, 15)$\n", "\n", "$\\bar{IQ}_{30} \\sim N(100, \\frac{15}{\\sqrt{30}})$" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "02b4d5bab7cd49618259cb72bda6d350", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=101, description='x_bar', max=110, min=100), Checkbox(value=False, descr…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(iq_sample_mean_dist, x_bar=(100, 110, 1), normalized=False, c_crit=False, n=fixed(30))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Schemat postępowania" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Określenie badanej cechy, jej rozkładu i badanego parametru jej rozkładu\n", "- Określenie hipotezy zerowej $H_0$ i hipotezy alternatywnej $H_1$\n", "- Identyfikacja testu statystycznego i statystyki testowej\n", "- Wybór poziomu istotności $\\alpha$\n", "- Sformułowanie reguły decyzyjnej: określenie obszarów krytycznych i zasad odrzucenia hipotezy $H_0$\n", "- Pobranie próby losowej prostej. Obliczenie na podstawie próbki wartości statystyki testowej\n", "- Podjęcie decyzji" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Określenie badanej cechy, jej rozkładu i badanego parametru jej rozkładu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład: $X \\sim N(\\mu, 15)$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Określenie hipotezy zerowej i alternatywnej" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$H_0: \\theta = \\theta_0$\n", "
$H_1: \\theta > \\theta_0$\n", "\n", "---\n", "\n", "$H_0: \\theta = \\theta_0$\n", "
$H_1: \\theta < \\theta_0$\n", "\t\n", "--- \n", " \n", "$H_0: \\theta = \\theta_0$\n", "
$H_1: \\theta \\neq \\theta_0$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład:\n", "
$\\mu_0 = 100$\n", "\t\t\n", "Układ hipotez:\n", "
$H_0: \\mu = 100$\n", "
$H_1: \\mu > 100$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Identyfikacja testu statystycznego i statystyki testowej" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład: zmienna o rozkładzie normalnym ze znaną wariancją:\n", "\n", "$Z=\\frac{\\bar{X}-\\mu_0}{\\frac{\\sigma}{\\sqrt{n}}}$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sformułowanie reguły decyzyjnej: określenie obszarów krytycznych i zasad odrzucenia hipotezy $H_0$" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "39d27f6851f24f969f8a5351362f34f5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(Dropdown(description='test_type', index=1, options=('left', 'right', 'two-sided'), value…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(critical_region, test_type=[\"left\", \"right\", \"two-sided\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$Z \\in C_{kr}: \\textrm{ Odrzucamy } H_0 \\textrm{ na rzecz } H_1$\n", "
$Z \\notin C_{kr}: \\textrm{ Brak podstaw do odrzucenia } H_0$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pobranie próby losowej prostej, obliczenie na podstawie próbki wartości statystyki testowej, podjęcie decyzji" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład: test prawostronny, $\\alpha=0.05$\n", "\n", "$C_{kr} = (1.644854, \\infty)$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dla $\\bar{x} = 101$:\n", "\n", "$Z = \\frac{101-100}{\\frac{15}{\\sqrt{30}}} = 0.3651484$\n", "\n", "$Z \\notin C_{kr}$\n", "\t\n", "$ \\textrm{ Brak podstaw do odrzucenia } H_0$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dla $\\bar{x} = 106$:\n", "\n", "$Z = \\frac{106-100}{\\frac{15}{\\sqrt{30}}} = 2.19089$\n", "\n", "$Z \\in C_{kr}$\n", "\n", "$ \\textrm{ Odrzucamy } H_0 \\textrm{ na rzecz } H_1$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### p-wartość" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0cbbd7ac39e64fbea812ca2857f5a593", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=106, description='x_bar', max=110, min=100), Output()), _dom_classes=('w…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(p_value, x_bar=(100, 110, 1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład:\n", "\n", "$\\alpha = 0.05$\n", "\n", "$P(\\bar{X}>106|\\mu=100)$\n", "\n", "$=P(Z>\\frac{106-100}{{\\frac{15}{\\sqrt{30}}}})$\n", "\n", "$=P(Z>2.19089) \\approx 0.014$\n", "\n", "$p < \\alpha$\n", "\n", "$ \\textrm{ Odrzucamy } H_0 \\textrm{ na rzecz } H_1$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Testowanie hipotez dla jednej zbiorowości (rozkład normalny ze znaną wariancją): podsumowanie" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$H_0: \\mu = \\mu_0$\n", "\n", "$H_1: \\mu > / \\neq / < \\mu_0$\n", "\n", "- założenia: rozkład normalny, odchylenie standardowe jest znane \n", "- statystyka:\n", "$$Z=\\frac{\\bar{X}-\\mu_0}{\\frac{\\sigma}{\\sqrt{n}}}$$\n", "- zbiór krytyczny, wartość krytyczna, zbiór przyjęć\n", "- odrzucenie $H_0$:\n", " - test prawostronny: $Z \\in (z_{kr},\\infty)$; $p \\leq \\alpha$\n", " - test lewostronny: $Z \\in (\\infty, -z_{kr})$; $p \\leq \\alpha$\n", " - test dwustronny: $Z \\in (-\\infty, -z_{kr}) \\cup (z_{kr},\\infty)$; $2p \\leq \\alpha$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Decyzje względem $H_0$ i rodzaje błędów" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "| | Odrzucamy $H_0$ | Nie odrzucamy $H_0$ |\n", "| --- | --- | --- |\n", "| $H_0$ prawdziwa | Błąd I rodzaju $\\alpha$ | Decyzja właściwa |\n", "| $H_0$ fałszywa | Decyzja właściwa ($1-\\beta$) | Błąd II rodzaju ($\\beta$) |" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- $\\alpha$ poziom istotności testu\n", "- $1-\\beta$ moc testu \n", "- $\\alpha \\downarrow \\rightarrow \\beta \\uparrow$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![błędy](https://effectsizefaq.files.wordpress.com/2010/05/type-i-and-type-ii-errors.jpg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](https://effectsizefaq.files.wordpress.com/2010/05/type-i-and-type-ii-errors.jpg)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c2e139ee39bd4326936f7a734ae0c240", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(FloatSlider(value=0.05, description='alpha', max=0.25, min=0.01, step=0.01), IntSlider(v…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(alpha_beta, alpha=(0.01, 0.25, 0.01), mu2=(100, 110, 1), n=(30, 60, 5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test w rozkładzie dwupunktowym" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Hipoteza zerowa:\n", "\n", "$H_0: p = p_0$\n", "\n", "- Hipoteza alternatywna, np.:\n", "\n", "$H_1: p < p_0 $\n", "\n", "- Statystyka\n", "\n", "$ S_n = \\sum_{i=1}^{n}X_i \\sim B_n(p) $" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Statystyka testowa – standaryzacja" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$ S_n = \\sum_{i=1}^{n}X_i \\sim B_n(p) $\n", "\n", "$E[S_n] = n \\cdot p_0$\n", "\n", "$D^2[S_n] = n \\cdot p_0 \\cdot(1-p_0)$\n", "\n", "$D[S_n] = \\sqrt{n \\cdot p_0 \\cdot(1-p_0)}$\n", "\n", "$Z = \\frac{S_n - E[S_n]}{D[S_n]} = \\frac{S_n - n \\cdot p_0}{\\sqrt{n \\cdot p_0 \\cdot (1 - p_0)}}$\n", "\n", "- dla $n \\cdot p \\geq 5$ i $n \\cdot (1 - p) \\geq 5$:\n", "\n", "$ Z \\sim N(0,1) $" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### lub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Statystyka\n", "\n", "$ \\bar{X} = \\frac{\\textrm{liczba sukcesów}}{n} $\n", "\n", "- Standaryzacja\n", "\n", "$E[\\bar{X}] = p_0$\n", "\n", "$D[\\bar{X}] = \\sqrt{\\frac{{p_0(1-p_0)}}{n}}$\n", "\n", "$Z = \\frac{\\bar{X}-E[\\bar{X}]}{D[\\bar{X}]}=\\frac{\\bar{X}-p_0}{\\sqrt{\\frac{{p_0(1-p_0)}}{n}}}$" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false } }, "nbformat": 4, "nbformat_minor": 4 }