{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from ipywidgets import *\n", "import matplotlib.pyplot as plt\n", "from IPython.display import set_matplotlib_formats\n", "set_matplotlib_formats('svg')\n", "import numpy as np\n", "import scipy.stats as stats" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def spearman_pearson(example=\"1\", show=False):\n", " fig, axes = plt.subplots(figsize=(5,5))\n", " n = 50\n", " x = np.linspace(0, 1, n)\n", " y = x \n", " if example==\"2\":\n", " y = x**5\n", " elif example==\"3\":\n", " y = x + np.random.normal(0, 0.2, n)\n", " x[-5:] = np.linspace(2, 3, 5)\n", " elif example==\"4\":\n", " y = x + np.random.normal(0, 1, n)\n", " plt.scatter(x, y)\n", " plt.xlabel(\"x\")\n", " plt.ylabel(\"y\")\n", " r = round(stats.pearsonr(x,y)[0],2)\n", " rs = round(stats.spearmanr(x,y)[0],2)\n", " if show:\n", " plt.title(r\"$r_{Pearson}=\"+str(r)+\"$ $r_{Spearman}=\"+str(rs)+\"$\")\n", " plt.grid()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Testy nieparametryczne" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test znaków" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Odpowiednik sparowanego testu t dla dwóch populacji\n", "- Założenia:\n", "\t- próba losowa prosta\n", "\t- pary $(X,Y)$ niezależne\n", "- Skala co najmniej porządkowa\n", "- Układ hipotez:\n", "\n", "$\\;\\;\\;\\;\\;H_0: p=P(Y>X)=0.5$\n", "\n", "$\\;\\;\\;\\;\\;H_1: p \\neq 0.5 | p > 0.5 | p < 0.5$\n", "\n", "- Odrzucenie X=Y\n", "- T: liczba znaków(+)\n", "- $T_{H_0} \\sim B_n(0.5,n)$\n", "- Dla $np_0=n(1-p_0)>5$:\n", "\n", "$$Z=\\frac{T-np_0}{\\sqrt{np_0(1-p_0)}} \\sim N(0,1)$$\n", "\n", "$$Z =\\frac{2T-n}{\\sqrt{n}} \\sim N(0,1)$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test znaków - przykład" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "| lp | test 1 | test 2 | różnica | znak |\n", "| --- | --- | --- | --- | --- |\n", "| 1 | 1.00 | 10.00 | 9.00 | $+$ |\n", "| 2 | 4.00 | 6.00 | 2.00 | $+$ |\n", "| 3 | 2.00 | 8.00 | 6.00 | $+$ |\n", "| 4 | 3.00 | 9.00 | 6.00 | $+$ |\n", "| 5 | 0.00 | 0.00 | \\textbf{0.00} |\n", "| 6 | 5.00 | 9.00 | 4.00 | $+$ |\n", "| 7 | 10.00 | 7.00 | -3.00 | $-$|\n", "| 8 | 9.00 | 5.00 | -4.00 | $-$|\n", "| 9 | 8.00 | 7.00 | -1.00 | $-$|\n", "| 10 | 8.00 | 4.00 | -4.00 | $-$|\n", "| 11 | 2.00 | 5.00 | 3.00 | $+$|\n", "| 12 | 3.00 | 5.00 | 2.00 | $+$|\n", "| 13 | 6.00 | 4.00 | -2.00 | $-$|\n", "| 14 | 5.00 | 7.00 | 2.00 | $+$|\n", "| 15 | 8.00 | 8.00 | \\textbf{0.00} |\n", "| 16 | 1.00 | 9.00 | 8.00 | $+$|" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$\\alpha = 0.05$\n", "\n", "$H_0: p=0.5$\n", "
$H_1: p \\neq 0.5$\n", "\n", "$n=14$\n", "\n", "$S_n=T=9$\n", "\n", "$Z=\\frac{2T-n}{\\sqrt{n}}=\\frac{2\\cdot9-14}{\\sqrt{14}}=1.069$\n", "\n", "$\\textrm{Zbiór krytyczny: }(-\\infty, -1.96)\\cup(1.96,\\infty)$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test Wilcoxona" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Odpowiednik sparowanego testu t dla dwóch populacji\n", "- Założenia:\n", "\t- rozkład różnic symetryczny\n", "\t- różnice są niezależne\n", "- Układ hipotez:\n", "\n", "$\\;\\;\\;\\;\\;H_0: median(Y-X)=0$\n", "\n", "$\\;\\;\\;\\;\\;H_1: median(Y-X)\\neq0$\n", "\n", "- Rangowanie wartości bezwzględnych różnic\n", "\n", "- Dla równych różnic średnia arytmetyczna rang\n", "\n", "- Statystyka $T=min[\\sum(+),\\sum(-)]$\n", "\n", "- Zbiór krytyczny: $C_{kr}=[0, T_{kr}]$\n", "\n", "- Dla dużych prób:\n", "\n", "$\\;\\;\\;\\;\\;\\mu_T = \\frac{n(n+1)}{4}$\n", "\n", "$\\;\\;\\;\\;\\;\\sigma_T = \\sqrt{\\frac{n(n+1)(2n+1)}{24}}$\n", "\n", "$\\;\\;\\;\\;\\;Z = \\frac{T-\\mu_T}{\\sigma_T}$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test Wilcoxona - przykład" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "| lp | test 1 | test 2 | różnica | moduł różnicy | ranga |\n", "| --- | --- | --- | --- | --- | --- |\n", "| 1 | 1.00 | 10.00 | 9.00 | 9.00 | |\n", "| 2 | 4.00 | 6.00 | 2.00 | 2.00 ||\n", "| 3 | 2.00 | 8.00 | 6.00 | 6.00 ||\n", "| 4 | 3.00 | 9.00 | 6.00 | 6.00 ||\n", "| 5 | 0.00 | 0.00 | 0.00 | 0.00 | |\n", "| 6 | 5.00 | 9.00 | 4.00 | 4.00 ||\n", "| 7 | 10.00 | 7.00 | -3.00 | 3.00 ||\n", "| 8 | 9.00 | 5.00 | -4.00 | 4.00 ||\n", "| 9 | 8.00 | 7.00 | -1.00 | 1.00 ||\n", "| 10 | 8.00 | 4.00 | -4.00 |4.00||\n", "| 11 | 2.00 | 5.00 | 3.00 | 3.00||\n", "| 12 | 3.00 | 5.00 | 2.00 | 2.00||\n", "| 13 | 6.00 | 4.00 | -2.00 | 2.00||\n", "| 14 | 5.00 | 7.00 | 2.00 | 2.00||\n", "| 15 | 8.00 | 8.00 | 0.00 | 0.00 ||\n", "| 16 | 1.00 | 9.00 | 8.00 | 8.00|| " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "| lp | test 1 | test 2 | różnica | moduł różnicy | ranga |\n", "| --- | --- | --- | --- | --- | --- |\n", "| 1 | 1.00 | 10.00 | 9.00 | 9.00 | \\textbf{14.00}|\n", "| 2 | 4.00 | 6.00 | 2.00 | 2.00 | \\textbf{3.50}|\n", "| 3 | 2.00 | 8.00 | 6.00 | 6.00 | \\textbf{11.50}|\n", "| 4 | 3.00 | 9.00 | 6.00 | 6.00 | \\textbf{11.50}|\n", "| 5 | 0.00 | 0.00 | 0.00 | 0.00 | $-$|\n", "| 6 | 5.00 | 9.00 | 4.00 | 4.00 | \\textbf{9.00}|\n", "| 7 | 10.00 | 7.00 | -3.00 | 3.00 | 6.50|\n", "| 8 | 9.00 | 5.00 | -4.00 | 4.00 | 9.00|\n", "| 9 | 8.00 | 7.00 | -1.00 | 1.00 | 1.00|\n", "| 10 | 8.00 | 4.00 | -4.00 |4.00| 9.00|\n", "| 11 | 2.00 | 5.00 | 3.00 | 3.00| \\textbf{6.50}|\n", "| 12 | 3.00 | 5.00 | 2.00 | 2.00| \\textbf{3.50}|\n", "| 13 | 6.00 | 4.00 | -2.00 | 2.00| 3.50|\n", "| 14 | 5.00 | 7.00 | 2.00 | 2.00| \\textbf{3.50}|\n", "| 15 | 8.00 | 8.00 | 0.00 | 0.00 | $-$|\n", "| 16 | 1.00 | 9.00 | 8.00 | 8.00| \\textbf{13.00}|" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$\\alpha = 0.05$\n", "\n", "$H_0: median(Y-X)=0$\n", "\n", "$H_1: median(Y-X)\\neq0$\n", "\n", "$\\sum(-) = 6.5 + 9 + 1 + 9 + 3.5 = 29$\n", "\n", "$\\sum(+) = 14 + 3.5 + 11.5 + 11.5 + 9 + 6.5 + 3.5 + 3.5 + 14 = 76$\n", "\n", "$T=min[\\sum(+),\\sum(-)] = 29$\n", "\n", "$\\textrm{Zbiór krytyczny: }[0,26]$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Współczynnik korelacji Spearmana" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Współczynnik korelacji Pearsona dla rang obserwacji\n", "- Jeśli brak rang wiązanych:\n", "\n", "$$r_s=1-\\frac{6\\cdot\\sum_{i=1}^{n}d_i^2}{n\\cdot(n^2-1)}$$\n", "\n", "- Test istotności:\n", "\n", "$\\;\\;\\;\\;\\;H_0: \\rho_s=0$\n", "
$\\;\\;\\;\\;\\;H_1: \\rho_s\\neq0$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Przykład" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "|X|-4|8|9|\n", "|--|--|--|--|\n", "|Y|10|2|1|" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "|X |-4|8|9|\n", "|--|--|--|--|\n", "|Ranga| | | |" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "|Y |10|2|1|\n", "|--|--|--|--|\n", "|Ranga| | | |" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$\\bar{x} = $\n", "\n", "$\\bar{y} = $" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$r = \\frac{\\sum_{i=1}^{n}(x_i-\\bar{x})(y_i-\\bar{y})}{\\sqrt{\\sum_{i=1}^{n}(x_i-\\bar{x})^2}\\sqrt{\\sum_{i=1}^{n}(y_i-\\bar{y})^2}}$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Współczynnik korelacji Pearsona a Spearmana" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "340c3d9062804660a1cd088e39ed75a7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(Dropdown(description='example', options=('1', '2', '3', '4'), value='1'), Checkbox(value…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(spearman_pearson, example=[\"1\",\"2\",\"3\", \"4\"])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false } }, "nbformat": 4, "nbformat_minor": 4 }