{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from ipywidgets import *\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as stats\n", "from matplotlib.lines import Line2D\n", "plt.rcParams[\"figure.figsize\"] = (20,10)\n", "plt.rcParams.update({'font.size': 22})" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def describe(data):\n", " df = pd.DataFrame(data)\n", " print(df.describe())\n", "\n", "def histogram(data, a=1, b=0):\n", " data = data*a+b\n", " plt.hist(x=data, bins='auto', color='#521422', alpha=0.7, rwidth=0.85)\n", " plt.grid(axis='y', alpha=0.75)\n", " plt.xlabel('X')\n", " plt.ylabel('Liczność')\n", " describe(data)\n", " plt.xlim(xmin=-13,xmax=13)\n", " \n", "def normal_dist():\n", " x = np.linspace(-3, 3, 100)\n", " plt.plot(x, stats.norm.pdf(x, 0, 1))\n", " plt.grid()\n", " \n", "def skewness(a=0):\n", " data = (stats.skewnorm(a).rvs(1000)*10).astype(int)\n", " \n", " _, _, patches = plt.hist(x=data, bins='auto', color='#521422', alpha=0.7, rwidth=0.85)\n", " plt.grid(axis='y', alpha=0.75)\n", " plt.xlabel('X')\n", " plt.ylabel('Liczność')\n", " \n", " stat = [np.mean(data), np.median(data), stats.mode(data)[0][0]]\n", " cols = ['r', 'g', 'b']\n", " lines = [Line2D([0], [0], color=c, lw=4) for c in cols]\n", " \n", " for col, st in zip(cols, stat):\n", " for i, patch in enumerate(patches):\n", " if st < patch.get_x():\n", " patches[i-1].set_color(col)\n", " break\n", " if i == len(patches)-1:\n", " patches[i].set_color(col)\n", " \n", " plt.suptitle(\"Skośność=\"+str(round(stats.skew(data),3)))\n", " plt.legend(lines,[txt +'='+str(stat[i]) for i, txt in enumerate(['Średnia', 'Mediana', 'Dominanta'])]) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Statystyka opisowa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![histogramy](https://www.cs.put.poznan.pl/amensfelt/pub/pics/hists.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![weurl](https://assets.weforum.org/editor/F9EqAhZ_XqicwLxiJpPB4sLnnlbbAtrlnnnGWtARM1w.gif \"spread\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](https://www.weforum.org/agenda/2020/03/coronavirus-control-measures)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Przykład - wyniki maratonu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![histogramy](https://www.cs.put.poznan.pl/amensfelt/pub/pics/marathon_total.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](https://enduhub.com/pl/wyniki/2017/10/15/bieganie/18-pko-poznan-maraton,33686/)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Przykład - wyniki maratonu 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![histogramy](https://www.cs.put.poznan.pl/amensfelt/pub/pics/marathon_km.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Miary rozkładu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item Miary położenia\n", "\t\\item Miary rozproszenia\n", "\t\\item Miary asymetrii i koncentracji \n", "\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dominanta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dominantą nazywamy najczęściej występującą wartość w próbce." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mediana" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$n$ - liczba obserwacji\n", "\\vspace{0.5cm}\n", "\\begin{itemize}\n", "\t\\item $n$ nieparzyste\n", "\t$$\\textrm{Mediana} = x_{(n+1)/2}$$\n", "\t\\vspace{0.25cm}\n", "\t\\item $n$ parzyste\n", "\t$$\\textrm{Mediana} = \\frac{x_{n/2}+x_{n/2+1}}{2}$$\n", "\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inne kwantyle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item Percentyle\n", "\t\\item Decyle\n", "\t\\item Kwartyle\n", "\t\\item ...\n", "\\end{itemize}\n", "$$Poz_p = (n+1)p$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![siatka](http://bi.gazeta.pl/im/8/1326/m1326948.jpg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](http://bi.gazeta.pl/im/8/1326/m1326948.jpg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Średnia arytmetyczna" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item w populacji:\n", "\t$$\\mu = \\frac{1}{n} \\sum_{i=1}^{n}x_i$$\n", "\n", " \\item w próbce:\n", "\t$$\\bar{x} = \\frac{1}{n} \\sum_{i=1}^{n}x_i$$\n", "\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![średnia](http://www.texample.net/media/tikz/examples/PNG/balance.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](http://www.texample.net/media/tikz/examples/PNG/balance.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inne średnie" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item Średnia geometryczna\n", "\t\t$$\\bar{x}_g = {\\displaystyle \\left(\\prod _{i=1}^{n}x_{i}\\right)^{\\frac {1}{n}}={\\sqrt[{n}]{x_{1}x_{2}\\cdots x_{n}}}}$$\n", " \n", "\\item Średnia harmoniczna\n", "\t\t\t$$\\bar{x}_h=\\frac{n}{\\sum\\limits_{i=1}^{n}\\frac{1}{x_i}} ~~~~~~~~~~~~~ \\bar{x}_{wh}=\\frac{\\sum\\limits_{i=1}^{n}w_i}{\\sum\\limits_{i=1}^{n}\\frac{w_i}{x_i}}$$\n", " \n", "\\item Średnia ucinana\n", "\t\t\t$$\\bar{x}_{t} = \\frac{1}{n-2k}\\sum_{i=k+1}^{n-k}x_i$$\n", "\t\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Przykład - wynagrodzenia" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![wynagrodzenia](https://www.cs.put.poznan.pl/amensfelt/pub/pics/salary.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Źródło](https://stat.gov.pl)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Skale pomiarowe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "|\t\t | Dominanta | Mediana | Średnia |\n", "|---|---|---|---|\n", "|Nominalne | | | |\n", "|Porządkowe | | | |\n", "|Interwałowe/Ilorazowe | | | |" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Miary rozproszenia" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item ? \n", "\t$$\\frac{1}{n}\\sum_{i=1}^{n}(x_i-\\bar{x})$$\n", " \n", "\\item Odchylenie przeciętne\n", "\t$$D = \\frac{1}{n}\\sum_{i=1}^{n}|(x_i-\\bar{x})|$$\n", "\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Wariancja i odchylenie standardowe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item Wariancja w populacji\n", "\t$$\\sigma^2=\\frac{1}{n}\\sum_{i=1}^{n}(x_i-\\bar{x})^2$$\n", " \n", "\\item Wariancja w próbce\n", "\t$$s^2=\\frac{1}{\\boldsymbol{n}-\\boldsymbol{1}}\\sum_{i=1}^{n}(x_i-\\bar{x})^2$$\n", " \n", "\\item Odchylenie standardowe\n", "\t$$s=\\sqrt{s^2}$$\n", "\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inne miary rozproszenia" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\t\\item Rozstęp \n", "\t\t\t$$R = x_{max}-x_{min}$$\n", " \n", "\\item Rozstęp międzykwartylowy \n", "\t\t\t$$IQR = Q_3-Q_1$$\n", " \n", "\\item Współczynnik zmienności \n", "\t\t\t$$V = \\frac{s}{\\bar{x}}$$\n", "\\end{itemize}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0\n", "count 1000.000000\n", "mean 1.033392\n", "std 0.998886\n", "min -2.928110\n", "25% 0.358840\n", "50% 1.029475\n", "75% 1.699662\n", "max 3.705007\n" ] } ], "source": [ "data = np.random.normal(loc=1, scale=1, size=1000) \n", "describe(data)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "79b1e93c05604069901406c065528daf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=1, description='a', max=3, min=-3), IntSlider(value=0, description='b', …" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(histogram, data=fixed(data), a=(-3,3,1), b=(-10,10,1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Wykres pudełkowy (boxplot)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![boxplot](https://www.cs.put.poznan.pl/amensfelt/pub/pics/boxplot2.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Miary asymetrii i koncentracji" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Moment centralny rzędu \\textit{k}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$M_k = \\frac{1}{n}\\sum_{i=1}^{n}(x_i-\\bar{x})^k$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Współczynnik asymetrii (skośności)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$A = \\frac{M_3}{s^3}$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Współczynnik wyostrzenia" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{itemize}\n", "\t\\item Współczynnik koncentracji (kurtoza)\n", "\t$$K=\\frac{M_4}{s^4}$$\n", " \n", "\\item Współczynnik wyostrzenia\n", "\t$$E=K-3$$\n", "\\end{itemize}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "normal_dist()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2434cdd48ee94b218f3aeb387523f9f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=0, description='a', max=20, min=-20, step=2), Output()), _dom_classes=('…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "interact(skewness, a=(-20,20,2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Miary rozkładu dla szeregu rozdzielczego" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Średnia i wariancja dla szeregu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$n_i$ - liczność $i$-tego przedziału\n", "\n", "$\\dot{x_i}$ - środek $i$-tego przedziału\n", "\n", "\\begin{itemize}\n", "\t\t\\item Średnia:\n", "\t\t\t$$\\bar{x_S} \\approx \\frac{\\sum\\limits_{i=1}^kn_i*\\dot{x_i}}{n}$$\n", " \n", "\\item Wariancja:\n", "\t\t\t$$s^2_S \\approx \\frac{\\sum\\limits_{i=1}^kn_i*(\\dot{x}-\\bar{x})^2}{n-1}$$\n", "\\end{itemize}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dominanta - szereg rozdzielczy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$x_{modS} \\approx x_0 + \\frac{n_0 - n_{-1}}{(n_0 - n_{-1})+(n_0 - n_{+1})}*h_0$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![dominanta](https://www.cs.put.poznan.pl/amensfelt/pub/pics/dominanta.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mediana - szereg rozdzielczy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$x_{medS} \\approx x_0 + \\frac{h_0}{n_0}*(\\frac{n}{2}-F_{-1})$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![mediana](https://www.cs.put.poznan.pl/amensfelt/pub/pics/mediana.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Skośność" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$A=\\frac{\\bar{x}-D}{s}$$" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false } }, "nbformat": 4, "nbformat_minor": 4 }