{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "from ipywidgets import *\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import set_matplotlib_formats\n",
    "set_matplotlib_formats('svg')\n",
    "import numpy as np\n",
    "import scipy.stats as stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def target(ax):\n",
    "    for r in [1, 0.75, 0.5, 0.25]:\n",
    "        circle = plt.Circle((0, 0), r, alpha=0.5, zorder=0)\n",
    "        ax.add_artist(circle)\n",
    "    ax.axhline(y=0, color='y', linestyle='--', zorder=1)\n",
    "    ax.axvline(x=0, color='y', linestyle='--', zorder=1)\n",
    "    ax.set_xlim(-1,1)\n",
    "    ax.set_ylim(-1,1)\n",
    "    ax.axis('off')\n",
    "    \n",
    "def double_target():\n",
    "    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,5))\n",
    "    target(ax[0])\n",
    "    target(ax[1])\n",
    "    return ax\n",
    "\n",
    "def add_points(mu, sigma, n, ax):\n",
    "    points = np.random.normal(mu, sigma, (2, n))\n",
    "    ax.scatter(*points, c='r', zorder=2, marker='x')\n",
    "\n",
    "def estimate(mu_left=0, mu_right=0, sigma_left=0.25, sigma_right=0.25, n_left=100, n_right=100):\n",
    "    axes = double_target()\n",
    "    for mu, sigma, n, ax in zip([mu_left, mu_right], [sigma_left, sigma_right], [n_left, n_right], axes):\n",
    "        add_points(mu, sigma, n, ax) \n",
    "        \n",
    "def conf_int(n=10, alpha=0.05):\n",
    "    plt.subplots(figsize=(10,5))\n",
    "    mu = 0\n",
    "    sigma = 1\n",
    "    ints_num = 100\n",
    "    points = np.random.normal(mu, sigma, (ints_num, n))\n",
    "    xs = np.mean(points, axis=1)\n",
    "    z = stats.norm.ppf(1-alpha/2, mu, sigma)\n",
    "    left = xs - z*sigma/np.sqrt(n)\n",
    "    right = xs + z*sigma/np.sqrt(n)\n",
    "    y = np.linspace(0, 100, ints_num)\n",
    "    in_count = np.mean([1 if l <= mu <= r else 0 for l,r in zip(left, right)])\n",
    "    colors = ['#1f77b4' if l <= mu <= r else '#ff7f50' for l,r in zip(left, right)]\n",
    "    plt.vlines(y, left, right, color=colors)\n",
    "    plt.plot(y, xs, '.')\n",
    "    plt.axhline(y=mu, color='gray', linestyle='--')\n",
    "    plt.ylim([-1.5, 1.5])\n",
    "    plt.title(in_count)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Estymatory"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pojęcia"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- __Próba prosta__ $\\boldsymbol{X} = (X_1, X_2, ..., X_n)$ niezależne i o tym samym rozkładzie\n",
    "- __Statystyka__ funkcja próby losowej prostej: $T(\\boldsymbol{X})=T(X_1, X_2, ..., X_n)$\n",
    "- __Estymator__ statystyka $\\hat{\\theta}(\\boldsymbol{X})$ będąca oszacowaniem nieznanego parametru populacji $\\theta$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Własności estymatorów"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- nieobciążoność\n",
    "- efektywność\n",
    "- zgodność \n",
    "- ..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estymator nieobciążony"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Estymator $\\hat{\\theta}$ parametru $\\theta$ jest nieobciążony jeżeli:\n",
    "\t$$E[\\hat{\\theta}] = \\theta$$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Przykład:\n",
    "\n",
    "$X_1, X_2, ..., X_n$ - niezależne, o takim samym rozkładzie:\n",
    "\n",
    "$E[X_i]=\\mu$\n",
    "\n",
    "$D^2[X_i]=\\sigma^2$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$E[\\bar{X}] = $\n",
    "\n",
    "$E[X_1] = $"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6cb82d3c720d4603993402c97747a75e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(FloatSlider(value=0.0, description='mu_right', max=0.4, min=-0.4, step=0.05), Output()),…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<function __main__.estimate(mu_left=0, mu_right=0, sigma_left=0.25, sigma_right=0.25, n_left=100, n_right=100)>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interact(estimate, mu_left=fixed(0), mu_right=(-0.4, 0.4, 0.05), sigma_left=fixed(0.25), sigma_right=fixed(0.25), n_left=fixed(100), n_right=fixed(100))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Obciążenie estymatora wariancji $\\frac{1}{n}\\sum_{i=1}^{n}(X_i-\\bar{X})^2$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$E[S^2] = E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\bar{X})^2]=E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu+\\mu-\\bar{X})^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n((X_i-\\mu)-(\\bar{X}-\\mu))^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n((X_i-\\mu)^2-2(\\bar{X}-\\mu)(X_i-\\mu)+(\\bar{X}-\\mu)^2)]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^nX_i-\\mu)^2-2(\\bar{X}-\\mu)\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)+\\frac{1}{n}\\sum_{i=1}^n(\\bar{X}-\\mu)^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-2(\\bar{X}-\\mu)(\\frac{1}{n}\\sum_{i=1}^nX_i-\\frac{1}{n}\\sum_{i=1}^n\\mu)+\\frac{1}{n}n(\\bar{X}-\\mu)^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-2(\\bar{X}-\\mu)(\\bar{X}-\\mu)+(\\bar{X}-\\mu)^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-2(\\bar{X}-\\mu)^2+(\\bar{X}-\\mu)^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2-(\\bar{X}-\\mu)^2]$\n",
    "\n",
    "$= E[\\frac{1}{n}\\sum_{i=1}^n(X_i-\\mu)^2]-E[(\\bar{X}-\\mu)^2]$\n",
    "\n",
    "$= \\frac{1}{n}\\sum_{i=1}^n \\underbrace{E[(X_i-\\mu)^2]}_{\\sigma^2}-\\underbrace{E[(\\bar{X}-\\mu)^2]}_{\\frac{\\sigma^2}{n}}$\n",
    "\n",
    "$= \\frac{n\\sigma^2}{n} - \\frac{\\sigma^2}{n}$\n",
    "\n",
    "$= \\frac{n\\sigma^2-\\sigma^2}{n} = \\frac{(n-1)\\sigma^2}{n}$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estymator efektywny"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Estymator nieobciążony $\\hat{\\theta}_1$ jest efektywniejszy od nieobciążonego estymatora $\\hat{\\theta}_2$, jeżeli zachodzi:\n",
    "\n",
    "$$ \\forall \\theta \\;\\;\\;  D^2[\\hat{\\theta}_1] \\leq D^2[\\hat{\\theta}_2] $$\n",
    "\n",
    "Estymator nieobciążony parametru $\\theta$ nazywamy efektywnym (najefektywniejszym), gdy jest efektywniejszy od wszystkich estymatorów nieobciążonych tego parametru."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8490a4efc04b47778434ff04fce80408",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(FloatSlider(value=0.25, description='sigma_right', max=1.0, min=0.1, step=0.05), Output(…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<function __main__.estimate(mu_left=0, mu_right=0, sigma_left=0.25, sigma_right=0.25, n_left=100, n_right=100)>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interact(estimate, mu_left=fixed(0), mu_right=fixed(0), sigma_left=fixed(0.1), sigma_right=(0.1, 1, 0.05), n_left=fixed(100), n_right=fixed(100))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estymator zgodny"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Estymator $\\hat{\\theta}$ parametru $\\theta$ jest zgodny jeżeli:\n",
    "\n",
    "$$\\forall \\boldsymbol{X},\\theta, \\epsilon > 0 \\;\\;\\; \\lim\\limits_{n\\rightarrow\\infty} P(|\\hat{\\theta}_n(\\boldsymbol{X})-\\theta| < \\epsilon) = 1$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "397022a7717349c3afd7d0dbefeb0f9e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(IntSlider(value=100, description='n_right', max=500, min=5, step=10), Output()), _dom_cl…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<function __main__.estimate(mu_left=0, mu_right=0, sigma_left=0.25, sigma_right=0.25, n_left=100, n_right=100)>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interact(estimate, mu_left=fixed(0), mu_right=fixed(0), sigma_left=fixed(0.1), sigma_right=fixed(0.1), n_left=fixed(100), n_right=(5, 500, 10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estymatory przedziałowe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- $\\hat{\\theta}$: estymator punktowy\n",
    "- $\\hat{\\theta}_L = \\hat{\\theta} - \\Delta$\n",
    " <br>$\\hat{\\theta}_P = \\hat{\\theta} + \\Delta$\n",
    "- $1 - \\alpha$: poziom ufności\n",
    "- P($\\hat{\\theta}_L \\leq \\theta \\leq \\hat{\\theta}_P$) = $1 - \\alpha$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estymatory przedziałowe: rozkład normalny ze znaną wariancją"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Przedział ufności dla średniej:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$1 - \\alpha = P(\\bar{X}-\\Delta \\leq \\mu \\leq \\bar{X}+\\Delta)$\n",
    "\n",
    "$= P(-\\Delta \\leq \\bar{X} - \\mu \\leq \\Delta)$\n",
    "\n",
    "$= P(-\\frac{\\Delta}{\\sigma}\\sqrt{n} \\leq \\frac{\\bar{X} - \\mu}{\\sigma/\\sqrt{n}} \\leq \\frac{\\Delta}{\\sigma}\\sqrt{n})$\n",
    "\n",
    "$=\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n})-\\Phi(-\\frac{\\Delta}{\\sigma}\\sqrt{n})$\n",
    "\n",
    "$=2\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n}) - 1$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$2\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n}) - 1=1-\\alpha$\n",
    "\n",
    "$2\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n}) = 2-\\alpha$\n",
    "\n",
    "$\\Phi(\\frac{\\Delta}{\\sigma}\\sqrt{n})=1-\\frac{\\alpha}{2}$\n",
    "\n",
    "$\\frac{\\Delta}{\\sigma}\\sqrt{n} = \\underbrace{\\Phi^{-1}(1-\\frac{\\alpha}{2})}_{z_{\\alpha}}$\n",
    "\n",
    "$\\Delta = z_\\alpha\\frac{\\sigma}{\\sqrt{n}}$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$$\\textrm{estymator przedziałowy:}\\;\\; (\\bar{X}-z_\\alpha\\frac{\\sigma}{\\sqrt{n}}, \\bar{X}+z_\\alpha\\frac{\\sigma}{\\sqrt{n}})$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "776daa8f3b1b427bbf104f507226c6f9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(IntSlider(value=10, description='n', max=1000, min=10, step=100), FloatSlider(value=0.05…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<function __main__.conf_int(n=10, alpha=0.05)>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interact(conf_int, n=(10, 1000, 100), alpha=(0.01, 0.5, 0.05))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Przykład"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- $X\\sim N(\\mu, \\sigma)$\n",
    "- $\\sigma^2=81$\n",
    "- $n = 36$\n",
    "- $\\bar{x}=175$\n",
    "- $1-\\alpha=0,95$"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}