{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład losowego podziału danych na zbiór uczący oraz testujący:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.cross_validation import train_test_split\n", "data = np.arange(30).reshape(-1,3)\n", "labels = np.arange(10)\n", "data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.25, random_state=1234)\n", "print(data)\n", "print(labels)\n", "print(data_train)\n", "print(data_test)\n", "print(labels_train)\n", "print(labels_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Przykład zastosowania k-krotnej walidacji:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn import datasets\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.cross_validation import cross_val_score\n", "iris = datasets.load_iris()\n", "knn = KNeighborsClassifier()\n", "scores = cross_val_score(knn, iris.data, iris.target, cv=5)\n", "print(scores)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Badanie wpływu parametru klasyfikatora na wyniki:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "scores = []\n", "n = []\n", "for i in range(1,30):\n", " knn.n_neighbors = i\n", " n.append(i)\n", " scores.append(cross_val_score(knn, iris.data, iris.target, cv=5).mean())\n", " print(n[-1], scores[-1])\n", "plt.plot(n, scores)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Szukanie najlepszego zestawu parametrów z użyciem k-krotnej walidacji:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn import svm, grid_search\n", "parameters = {'n_neighbors': list(range(1, 30)), 'p':[1,2,3,4] }\n", "clf = grid_search.GridSearchCV(knn, parameters, cv=5)\n", "clf.fit(iris.data, iris.target)\n", "print(clf.best_estimator_)\n", "print(clf.best_params_)\n", "print(clf.best_score_)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }