{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from ipywidgets import *\n", "import matplotlib.pyplot as plt\n", "from IPython.display import set_matplotlib_formats\n", "set_matplotlib_formats('svg')\n", "import numpy as np\n", "import scipy.stats as stats" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def target(ax):\n", " for r in [1, 0.75, 0.5, 0.25]:\n", " circle = plt.Circle((0, 0), r, alpha=0.5, zorder=0)\n", " ax.add_artist(circle)\n", " ax.axhline(y=0, color='y', linestyle='--', zorder=1)\n", " ax.axvline(x=0, color='y', linestyle='--', zorder=1)\n", " ax.set_xlim(-1,1)\n", " ax.set_ylim(-1,1)\n", " ax.axis('off')\n", " \n", "def double_target():\n", " fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,5))\n", " target(ax[0])\n", " target(ax[1])\n", " return ax\n", "\n", "def add_points(mu, sigma, n, ax):\n", " points = np.random.normal(mu, sigma, (2, n))\n", " ax.scatter(*points, c='r', zorder=2, marker='x')\n", "\n", "def estimate(mu_left=0, mu_right=0, sigma_left=0.25, sigma_right=0.25, n_left=100, n_right=100):\n", " axes = double_target()\n", " for mu, sigma, n, ax in zip([mu_left, mu_right], [sigma_left, sigma_right], [n_left, n_right], axes):\n", " add_points(mu, sigma, n, ax) \n", " \n", "def conf_int(n=10, alpha=0.05):\n", " plt.subplots(figsize=(10,5))\n", " mu = 0\n", " sigma = 1\n", " ints_num = 100\n", " points = np.random.normal(mu, sigma, (ints_num, n))\n", " xs = np.mean(points, axis=1)\n", " z = stats.norm.ppf(1-alpha/2, mu, sigma)\n", " left = xs - z*sigma/np.sqrt(n)\n", " right = xs + z*sigma/np.sqrt(n)\n", " y = np.linspace(0, 100, ints_num)\n", " in_count = np.mean([1 if l <= mu <= r else 0 for l,r in zip(left, right)])\n", " colors = ['#1f77b4' if l <= mu <= r else '#ff7f50' for l,r in zip(left, right)]\n", " plt.vlines(y, left, right, color=colors)\n", " plt.plot(y, xs, '.')\n", " plt.axhline(y=mu, color='gray', linestyle='--')\n", " plt.ylim([-1.5, 1.5])\n", " plt.title(in_count)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Estymatory" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pojęcia" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- __Próba prosta__ $\\boldsymbol{X} = (X_1, X_2, ..., X_n)$ niezależne i o tym samym rozkładzie\n", "- __Statystyka__ funkcja próby losowej prostej: $T(\\boldsymbol{X})=T(X_1, X_2, ..., X_n)$\n", "- __Estymator__ statystyka $\\hat{\\theta}(\\boldsymbol{X})$ będąca oszacowaniem nieznanego parametru populacji $\\theta$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Własności estymatorów" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- nieobciążoność\n", "- efektywność\n", "- zgodność \n", "- ..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estymator nieobciążony" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Estymator $\\hat{\\theta}$ parametru $\\theta$ jest nieobciążony jeżeli:\n", "\t$$E[\\hat{\\theta}] = \\theta$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład:\n", "\n", "$X_1, X_2, ..., X_n$ - niezależne, o takim samym rozkładzie:\n", "\n", "$E[X_i]=\\mu$\n", "\n", "$D^2[X_i]=\\sigma^2$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$E[\\bar{X}] = $\n", "\n", "$E[X_1] = $" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6cb82d3c720d4603993402c97747a75e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(FloatSlider(value=0.0, description='mu_right', max=0.4, min=-0.4, step=0.05), Output()),…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(estimate, mu_left=fixed(0), mu_right=(-0.4, 0.4, 0.05), sigma_left=fixed(0.25), sigma_right=fixed(0.25), n_left=fixed(100), n_right=fixed(100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Obciążenie estymatora wariancji $\\frac{1}{n}\\sum_{i=1}^{n}(X_i-\\bar{X})^2$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$E[S^2] = E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\bar{X})^2]=E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu+\\mu-\\bar{X})^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n((X_i-\\mu)-(\\bar{X}-\\mu))^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n((X_i-\\mu)^2-2(\\bar{X}-\\mu)(X_i-\\mu)+(\\bar{X}-\\mu)^2)]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^nX_i-\\mu)^2-2(\\bar{X}-\\mu)\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)+\\frac{1}{n}\\sum_{i=1}^n(\\bar{X}-\\mu)^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-2(\\bar{X}-\\mu)(\\frac{1}{n}\\sum_{i=1}^nX_i-\\frac{1}{n}\\sum_{i=1}^n\\mu)+\\frac{1}{n}n(\\bar{X}-\\mu)^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-2(\\bar{X}-\\mu)(\\bar{X}-\\mu)+(\\bar{X}-\\mu)^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-2(\\bar{X}-\\mu)^2+(\\bar{X}-\\mu)^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-(\\bar{X}-\\mu)^2]$\n", "\n", "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2]-E[(\\bar{X}-\\mu)^2]$\n", "\n", "$= \\frac{1}{n}\\sum_{i=1}^n \\underbrace{E[(X_i-\\mu)^2]}_{\\sigma^2}-\\underbrace{E[(\\bar{X}-\\mu)^2]}_{\\frac{\\sigma^2}{n}}$\n", "\n", "$= \\frac{n\\sigma^2}{n} - \\frac{\\sigma^2}{n}$\n", "\n", "$= \\frac{n\\sigma^2-\\sigma^2}{n} = \\frac{(n-1)\\sigma^2}{n}$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estymator efektywny" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Estymator nieobciążony $\\hat{\\theta}_1$ jest efektywniejszy od nieobciążonego estymatora $\\hat{\\theta}_2$, jeżeli zachodzi:\n", "\n", "$$ \\forall \\theta \\;\\;\\; D^2[\\hat{\\theta}_1] \\leq D^2[\\hat{\\theta}_2] $$\n", "\n", "Estymator nieobciążony parametru $\\theta$ nazywamy efektywnym (najefektywniejszym), gdy jest efektywniejszy od wszystkich estymatorów nieobciążonych tego parametru." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8490a4efc04b47778434ff04fce80408", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(FloatSlider(value=0.25, description='sigma_right', max=1.0, min=0.1, step=0.05), Output(…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(estimate, mu_left=fixed(0), mu_right=fixed(0), sigma_left=fixed(0.1), sigma_right=(0.1, 1, 0.05), n_left=fixed(100), n_right=fixed(100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estymator zgodny" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Estymator $\\hat{\\theta}$ parametru $\\theta$ jest zgodny jeżeli:\n", "\n", "$$\\forall \\boldsymbol{X},\\theta, \\epsilon > 0 \\;\\;\\; \\lim\\limits_{n\\rightarrow\\infty} P(|\\hat{\\theta}_n(\\boldsymbol{X})-\\theta| < \\epsilon) = 1$$" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "397022a7717349c3afd7d0dbefeb0f9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=100, description='n_right', max=500, min=5, step=10), Output()), _dom_cl…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(estimate, mu_left=fixed(0), mu_right=fixed(0), sigma_left=fixed(0.1), sigma_right=fixed(0.1), n_left=fixed(100), n_right=(5, 500, 10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estymatory przedziałowe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- $\\hat{\\theta}$: estymator punktowy\n", "- $\\hat{\\theta}_L = \\hat{\\theta} - \\Delta$\n", "
$\\hat{\\theta}_P = \\hat{\\theta} + \\Delta$\n", "- $1 - \\alpha$: poziom ufności\n", "- P($\\hat{\\theta}_L \\leq \\theta \\leq \\hat{\\theta}_P$) = $1 - \\alpha$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Estymatory przedziałowe: rozkład normalny ze znaną wariancją" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przedział ufności dla średniej:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$1 - \\alpha = P(\\bar{X}-\\Delta \\leq \\mu \\leq \\bar{X}+\\Delta)$\n", "\n", "$= P(-\\Delta \\leq \\bar{X} - \\mu \\leq \\Delta)$\n", "\n", "$= P(-\\frac{\\Delta}{\\sigma}\\sqrt{n} \\leq \\frac{\\bar{X} - \\mu}{\\sigma/\\sqrt{n}} \\leq \\frac{\\Delta}{\\sigma}\\sqrt{n})$\n", "\n", "$=\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n})-\\Phi(-\\frac{\\Delta}{\\sigma}\\sqrt{n})$\n", "\n", "$=2\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n}) - 1$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$2\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n}) - 1=1-\\alpha$\n", "\n", "$2\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n}) = 2-\\alpha$\n", "\n", "$\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n})=1-\\frac{\\alpha}{2}$\n", "\n", "$\\frac{\\Delta}{\\sigma}\\sqrt{n} = \\underbrace{\\Phi^{-1}(1-\\frac{\\alpha}{2})}_{z_{\\alpha}}$\n", "\n", "$\\Delta = z_\\alpha\\frac{\\sigma}{\\sqrt{n}}$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$\\textrm{estymator przedziałowy:}\\;\\; (\\bar{X}-z_\\alpha\\frac{\\sigma}{\\sqrt{n}}, \\bar{X}+z_\\alpha\\frac{\\sigma}{\\sqrt{n}})$$" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "776daa8f3b1b427bbf104f507226c6f9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=10, description='n', max=1000, min=10, step=100), FloatSlider(value=0.05…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(conf_int, n=(10, 1000, 100), alpha=(0.01, 0.5, 0.05))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Przykład" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- $X\\sim N(\\mu, \\sigma)$\n", "- $\\sigma^2=81$\n", "- $n = 36$\n", "- $\\bar{x}=175$\n", "- $1-\\alpha=0,95$" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false } }, "nbformat": 4, "nbformat_minor": 4 }