{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Supervised Learning with scikit-learn" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "plt.style.use('ggplot')\n", "\n", "import sklearn\n", "from sklearn import datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### KNN (binary)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 讀資料集" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
partyinfantswaterbudgetphysiciansalvadorreligioussatelliteaidmissileimmigrationsynfuelseducationsuperfundcrimeduty_free_exportseaa_rsa
4democrat0110110000001111
7republican0101110000011101
18democrat1110001110100011
22democrat1110001110000011
24democrat1010001111000011
......................................................
422democrat0110011110100111
425democrat1010001111000011
426republican0001111101011101
429republican0011110011011101
430democrat0010001111000001
\n", "

232 rows × 17 columns

\n", "
" ], "text/plain": [ " party infants water budget physician salvador religious \\\n", "4 democrat 0 1 1 0 1 1 \n", "7 republican 0 1 0 1 1 1 \n", "18 democrat 1 1 1 0 0 0 \n", "22 democrat 1 1 1 0 0 0 \n", "24 democrat 1 0 1 0 0 0 \n", ".. ... ... ... ... ... ... ... \n", "422 democrat 0 1 1 0 0 1 \n", "425 democrat 1 0 1 0 0 0 \n", "426 republican 0 0 0 1 1 1 \n", "429 republican 0 0 1 1 1 1 \n", "430 democrat 0 0 1 0 0 0 \n", "\n", " satellite aid missile immigration synfuels education superfund \\\n", "4 0 0 0 0 0 0 1 \n", "7 0 0 0 0 0 1 1 \n", "18 1 1 1 0 1 0 0 \n", "22 1 1 1 0 0 0 0 \n", "24 1 1 1 1 0 0 0 \n", ".. ... ... ... ... ... ... ... \n", "422 1 1 1 0 1 0 0 \n", "425 1 1 1 1 0 0 0 \n", "426 1 1 0 1 0 1 1 \n", "429 0 0 1 1 0 1 1 \n", "430 1 1 1 1 0 0 0 \n", "\n", " crime duty_free_exports eaa_rsa \n", "4 1 1 1 \n", "7 1 0 1 \n", "18 0 1 1 \n", "22 0 1 1 \n", "24 0 1 1 \n", ".. ... ... ... \n", "422 1 1 1 \n", "425 0 1 1 \n", "426 1 0 1 \n", "429 1 0 1 \n", "430 0 0 1 \n", "\n", "[232 rows x 17 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vote_raw = pd.read_csv(\"data/house-votes-84.csv\")\n", "\n", "vote = vote_raw.copy()\n", "col_names = ['party', 'infants', 'water', 'budget', 'physician', 'salvador',\n", " 'religious', 'satellite', 'aid', 'missile', 'immigration', 'synfuels',\n", " 'education', 'superfund', 'crime', 'duty_free_exports', 'eaa_rsa']\n", "vote.columns = col_names\n", "vote[vote == \"?\"] = np.nan # 把 ? 改成 na\n", "vote = vote.dropna()\n", "for i in col_names[1:]:\n", " vote[i] = vote[i].replace({\"y\": 1, \"n\": 0})\n", "vote" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 232 entries, 4 to 430\n", "Data columns (total 17 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 party 232 non-null object\n", " 1 infants 232 non-null int64 \n", " 2 water 232 non-null int64 \n", " 3 budget 232 non-null int64 \n", " 4 physician 232 non-null int64 \n", " 5 salvador 232 non-null int64 \n", " 6 religious 232 non-null int64 \n", " 7 satellite 232 non-null int64 \n", " 8 aid 232 non-null int64 \n", " 9 missile 232 non-null int64 \n", " 10 immigration 232 non-null int64 \n", " 11 synfuels 232 non-null int64 \n", " 12 education 232 non-null int64 \n", " 13 superfund 232 non-null int64 \n", " 14 crime 232 non-null int64 \n", " 15 duty_free_exports 232 non-null int64 \n", " 16 eaa_rsa 232 non-null int64 \n", "dtypes: int64(16), object(1)\n", "memory usage: 32.6+ KB\n" ] } ], "source": [ "vote.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 這筆資料共 232 個列,每一列是一個立委 \n", "* y 是 party(該立委所屬的政黨,民主黨或共和黨) \n", "* 剩下的全都是x,這些x都是各大議題的投票結果。以 `infants` 這個變數來說,就是在嬰兒這個議題上,此立委是投贊成票(1)還是反對票(0). \n", "* 那這筆資料的任務,就是根據這些議題的投票結果,來猜這個立委屬於哪個政黨" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 最簡單流程" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 先來講最簡單的流程 \n", " * 分 train/test \n", " * ~~定 pipeline.~~ \n", " * ~~定義 preprocessing steps.~~ \n", " * 定義 classifier. \n", " * ~~hyper-parameter tunning~~ \n", " * ~~grid search~~\n", " * ~~random search~~\n", " * 用整個 training set 做 fitting. \n", " * 對 testing set 做 predict. \n", " * 評估模型表現 \n", " * threshold. \n", " * non-trheshold\n", " * 細節資訊探索(e.g. fitting後的參數,...)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 分 train/test" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# 分 train/test\n", "X = vote.drop(\"party\", axis = 1)\n", "y = vote[\"party\"]\n", "\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, \n", " y, \n", " test_size = 0.3, \n", " random_state = 21, \n", " stratify = y\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 可以看看,X_train 和 X_test 的資料筆數分配" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The shape of X_train is: (162, 16)\n", "The shape of X_test is: (70, 16)\n" ] } ], "source": [ "print(f\"The shape of X_train is: {X_train.shape}\")\n", "print(f\"The shape of X_test is: {X_test.shape}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 可以看, y_train 和 y_test 的分佈是不是一樣(因為我有做 stratify)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "republican% in whole data set is: 0.46551724137931033\n", "republican% in training set is: 0.46296296296296297\n", "republican% in testing set is: 0.4714285714285714\n" ] } ], "source": [ "print(f\"republican% in whole data set is: {(y == 'republican').sum()/y.size}\")\n", "print(f\"republican% in training set is: {(y_train == 'republican').sum()/y_train.size}\")\n", "print(f\"republican% in testing set is: {(y_test == 'republican').sum()/y_test.size}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 做 pipeline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* pipeline 包括 preprocessing + model,那這邊只做 model" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# 定 pipeline\n", "## 定 preprocessing steps. (略)\n", "## 定 classifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "knn = KNeighborsClassifier(n_neighbors = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 很簡單的就定義完我要使用的 model (knn,以及 neighbor 數選了 5). \n", "* 我們可以看他的文件,來看這個 classifier 的細節" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\u001b[0;31mInit signature:\u001b[0m\n", "\u001b[0mKNeighborsClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mn_neighbors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'uniform'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0malgorithm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'auto'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mleaf_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mmetric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'minkowski'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mmetric_params\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m \n", "Classifier implementing the k-nearest neighbors vote.\n", "\n", "Read more in the :ref:`User Guide `.\n", "\n", "Parameters\n", "----------\n", "n_neighbors : int, default=5\n", " Number of neighbors to use by default for :meth:`kneighbors` queries.\n", "\n", "weights : {'uniform', 'distance'} or callable, default='uniform'\n", " Weight function used in prediction. Possible values:\n", "\n", " - 'uniform' : uniform weights. All points in each neighborhood\n", " are weighted equally.\n", " - 'distance' : weight points by the inverse of their distance.\n", " in this case, closer neighbors of a query point will have a\n", " greater influence than neighbors which are further away.\n", " - [callable] : a user-defined function which accepts an\n", " array of distances, and returns an array of the same shape\n", " containing the weights.\n", "\n", "algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n", " Algorithm used to compute the nearest neighbors:\n", "\n", " - 'ball_tree' will use :class:`BallTree`\n", " - 'kd_tree' will use :class:`KDTree`\n", " - 'brute' will use a brute-force search.\n", " - 'auto' will attempt to decide the most appropriate algorithm\n", " based on the values passed to :meth:`fit` method.\n", "\n", " Note: fitting on sparse input will override the setting of\n", " this parameter, using brute force.\n", "\n", "leaf_size : int, default=30\n", " Leaf size passed to BallTree or KDTree. This can affect the\n", " speed of the construction and query, as well as the memory\n", " required to store the tree. The optimal value depends on the\n", " nature of the problem.\n", "\n", "p : int, default=2\n", " Power parameter for the Minkowski metric. When p = 1, this is\n", " equivalent to using manhattan_distance (l1), and euclidean_distance\n", " (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", "\n", "metric : str or callable, default='minkowski'\n", " The distance metric to use for the tree. The default metric is\n", " minkowski, and with p=2 is equivalent to the standard Euclidean\n", " metric. For a list of available metrics, see the documentation of\n", " :class:`~sklearn.metrics.DistanceMetric`.\n", " If metric is \"precomputed\", X is assumed to be a distance matrix and\n", " must be square during fit. X may be a :term:`sparse graph`,\n", " in which case only \"nonzero\" elements may be considered neighbors.\n", "\n", "metric_params : dict, default=None\n", " Additional keyword arguments for the metric function.\n", "\n", "n_jobs : int, default=None\n", " The number of parallel jobs to run for neighbors search.\n", " ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n", " ``-1`` means using all processors. See :term:`Glossary `\n", " for more details.\n", " Doesn't affect :meth:`fit` method.\n", "\n", "Attributes\n", "----------\n", "classes_ : array of shape (n_classes,)\n", " Class labels known to the classifier\n", "\n", "effective_metric_ : str or callble\n", " The distance metric used. It will be same as the `metric` parameter\n", " or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n", " 'minkowski' and `p` parameter set to 2.\n", "\n", "effective_metric_params_ : dict\n", " Additional keyword arguments for the metric function. For most metrics\n", " will be same with `metric_params` parameter, but may also contain the\n", " `p` parameter value if the `effective_metric_` attribute is set to\n", " 'minkowski'.\n", "\n", "n_features_in_ : int\n", " Number of features seen during :term:`fit`.\n", "\n", " .. versionadded:: 0.24\n", "\n", "feature_names_in_ : ndarray of shape (`n_features_in_`,)\n", " Names of features seen during :term:`fit`. Defined only when `X`\n", " has feature names that are all strings.\n", "\n", " .. versionadded:: 1.0\n", "\n", "n_samples_fit_ : int\n", " Number of samples in the fitted data.\n", "\n", "outputs_2d_ : bool\n", " False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n", " otherwise True.\n", "\n", "See Also\n", "--------\n", "RadiusNeighborsClassifier: Classifier based on neighbors within a fixed radius.\n", "KNeighborsRegressor: Regression based on k-nearest neighbors.\n", "RadiusNeighborsRegressor: Regression based on neighbors within a fixed radius.\n", "NearestNeighbors: Unsupervised learner for implementing neighbor searches.\n", "\n", "Notes\n", "-----\n", "See :ref:`Nearest Neighbors ` in the online documentation\n", "for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n", "\n", ".. warning::\n", "\n", " Regarding the Nearest Neighbors algorithms, if it is found that two\n", " neighbors, neighbor `k+1` and `k`, have identical distances\n", " but different labels, the results will depend on the ordering of the\n", " training data.\n", "\n", "https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n", "\n", "Examples\n", "--------\n", ">>> X = [[0], [1], [2], [3]]\n", ">>> y = [0, 0, 1, 1]\n", ">>> from sklearn.neighbors import KNeighborsClassifier\n", ">>> neigh = KNeighborsClassifier(n_neighbors=3)\n", ">>> neigh.fit(X, y)\n", "KNeighborsClassifier(...)\n", ">>> print(neigh.predict([[1.1]]))\n", "[0]\n", ">>> print(neigh.predict_proba([[0.9]]))\n", "[[0.666... 0.333...]]\n", "\u001b[0;31mFile:\u001b[0m /Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/neighbors/_classification.py\n", "\u001b[0;31mType:\u001b[0m ABCMeta\n", "\u001b[0;31mSubclasses:\u001b[0m \n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "KNeighborsClassifier?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 用整個 training set 做 fitting." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier()" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn.fit(X_train, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* fitting 完後,可以簡要看一下他學到了啥" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'euclidean'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn.effective_metric_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 對 testing set 做 predict" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "y_pred = knn.predict(X_test)\n", "y_pred_prob = knn.predict_proba(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 我們看看預測結果(label)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pred_label
0democrat
1democrat
2republican
3democrat
4republican
\n", "
" ], "text/plain": [ " pred_label\n", "0 democrat\n", "1 democrat\n", "2 republican\n", "3 democrat\n", "4 republican" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(y_pred, columns= [\"pred_label\"]).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 這個預測值,就是去比預測此委員為 democrat 的機率,和 republican 的機率,哪個大,而做出的判斷. (因為只有兩類,所以其實就是 threshold = 0.5 -> 如果 republican 的機率值 > 0.5,就判定為 republican (y的positive是republican)\n", "* 所以我們來看一下預測機率值" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
democratrepublican
01.00.0
11.00.0
20.01.0
31.00.0
40.01.0
\n", "
" ], "text/plain": [ " democrat republican\n", "0 1.0 0.0\n", "1 1.0 0.0\n", "2 0.0 1.0\n", "3 1.0 0.0\n", "4 0.0 1.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(y_pred_prob, columns= y_test.unique()).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 這個機率的算法,就是找最近的 k 個 neighbor後,去統計 democrat 的比例,和 republican 的比例. \n", "* 所以以 index = 0 這一列來說,就是離此委員最近的 k 個 neighbor,全都是 democrat。" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 效果評估" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[32 5]\n", " [ 2 31]]\n", " precision recall f1-score support\n", "\n", " democrat 0.94 0.86 0.90 37\n", " republican 0.86 0.94 0.90 33\n", "\n", " accuracy 0.90 70\n", " macro avg 0.90 0.90 0.90 70\n", "weighted avg 0.90 0.90 0.90 70\n", "\n" ] } ], "source": [ "from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score\n", "# 評估結果\n", "print(confusion_matrix(y_test, y_pred))\n", "print(classification_report(y_test, y_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 可以看到,如果你把 `republican` 當 positive 的話,就看第二列 \n", " * precision: 0.86,表示你預測他是republican的人中,有86%真的是republican. \n", " * recall: 0.94,表示實際上是republican的人中,有94%被你抓到. \n", " * f1-score: 是precision和recall的調和平均數. \n", "* 接著看 index = `accuracy` 那一列,可以看到,準確率是 0.90. " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 0. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0.4, 0.6],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0.8, 0.2],\n", " [1. , 0. ],\n", " [0.4, 0.6],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0.8, 0.2],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [0.2, 0.8],\n", " [0.8, 0.2],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0.4, 0.6],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ],\n", " [0. , 1. ],\n", " [1. , 0. ],\n", " [0. , 1. ]])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_pred_prob" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "auc: 0.9492219492219492\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y_test_trans = (y_test==\"republican\") # republican = 1, democrat = 0\n", "y_pred_prob_trans = y_pred_prob[:,1] # 預測是 republican 的 機率\n", "\n", "auc = roc_auc_score(y_test_trans, y_pred_prob_trans)\n", "\n", "print(f\"auc: {auc}\")\n", "\n", "fpr, tpr, thresholds = roc_curve(y_test_trans, y_pred_prob_trans)\n", "plt.plot([0,1],[0, 1], \"k--\")\n", "plt.plot(fpr, tpr, label = \"KNN (K = 5)\")\n", "plt.xlabel(\"False Positive Rate\")\n", "plt.ylabel(\"True Positive Rate\")\n", "plt.title(\"KNN ROC Curve\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 最完整流程整理" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 來講最完整的流程 \n", " * 分 train/test \n", " * 定 pipeline \n", " * 定義 preprocessing steps \n", " * 定義 classifier. \n", " * hyper-parameter tunning \n", " * grid search \n", " * random search \n", " * 用整個 training set 做 fitting. \n", " * 對 testing set 做 predict. \n", " * 評估模型表現 \n", " * threshold. \n", " * non-trheshold\n", " * 細節資訊探索(e.g. fitting後的參數,...)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 分 train/test" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "# 分 train/test\n", "X = vote.drop(\"party\", axis = 1)\n", "y = vote[\"party\"]\n", "\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, \n", " y, \n", " test_size = 0.3, \n", " random_state = 21, \n", " stratify = y\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 做 pipeline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 對 knn 這種依賴 euclidean distance 的演算法,必須先做 normalization,再開始算距離,所以 pipeline寫成這樣:" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "# preprocess\n", "scaler = StandardScaler()\n", "\n", "# model\n", "knn = KNeighborsClassifier()\n", "\n", "# pipeline\n", "my_pipe = Pipeline([\n", " (\"scaler\", scaler),\n", " (\"knn\", knn)\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 做 hyper-parameter tunning" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 先來做 grid_search" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "parameters = {\"knn__n_neighbors\": np.arange(1, 50)}\n", "grid_cv = GridSearchCV(my_pipe, \n", " param_grid = parameters, \n", " cv = 5,\n", " scoring='roc_auc');\n", "grid_cv.fit(X_train, y_train);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 這邊看一下,第二列的 parameters 裡面,\"knn__\"的knn,是用我的 my_pipe 物件裡的名稱 (\"knn\"); \"n_neighbors\" 是超參數的名稱\n", "* 做完 fitting 後的物件就是 grid_cv 了,我們可以看最佳參數是多少:" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'knn__n_neighbors': 17}" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_cv.best_params_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 可以知道,最佳參數是 17. " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Object `ref:scoring_parameter` not found.\n" ] } ], "source": [ "?ref:scoring_parameter" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\u001b[0;31mInit signature:\u001b[0m\n", "\u001b[0mGridSearchCV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mparam_grid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mrefit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mpre_dispatch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'2*n_jobs'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0merror_score\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnan\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m \u001b[0mreturn_train_score\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m \n", "Exhaustive search over specified parameter values for an estimator.\n", "\n", "Important members are fit, predict.\n", "\n", "GridSearchCV implements a \"fit\" and a \"score\" method.\n", "It also implements \"score_samples\", \"predict\", \"predict_proba\",\n", "\"decision_function\", \"transform\" and \"inverse_transform\" if they are\n", "implemented in the estimator used.\n", "\n", "The parameters of the estimator used to apply these methods are optimized\n", "by cross-validated grid-search over a parameter grid.\n", "\n", "Read more in the :ref:`User Guide `.\n", "\n", "Parameters\n", "----------\n", "estimator : estimator object\n", " This is assumed to implement the scikit-learn estimator interface.\n", " Either estimator needs to provide a ``score`` function,\n", " or ``scoring`` must be passed.\n", "\n", "param_grid : dict or list of dictionaries\n", " Dictionary with parameters names (`str`) as keys and lists of\n", " parameter settings to try as values, or a list of such\n", " dictionaries, in which case the grids spanned by each dictionary\n", " in the list are explored. This enables searching over any sequence\n", " of parameter settings.\n", "\n", "scoring : str, callable, list, tuple or dict, default=None\n", " Strategy to evaluate the performance of the cross-validated model on\n", " the test set.\n", "\n", " If `scoring` represents a single score, one can use:\n", "\n", " - a single string (see :ref:`scoring_parameter`);\n", " - a callable (see :ref:`scoring`) that returns a single value.\n", "\n", " If `scoring` represents multiple scores, one can use:\n", "\n", " - a list or tuple of unique strings;\n", " - a callable returning a dictionary where the keys are the metric\n", " names and the values are the metric scores;\n", " - a dictionary with metric names as keys and callables a values.\n", "\n", " See :ref:`multimetric_grid_search` for an example.\n", "\n", "n_jobs : int, default=None\n", " Number of jobs to run in parallel.\n", " ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n", " ``-1`` means using all processors. See :term:`Glossary `\n", " for more details.\n", "\n", " .. versionchanged:: v0.20\n", " `n_jobs` default changed from 1 to None\n", "\n", "refit : bool, str, or callable, default=True\n", " Refit an estimator using the best found parameters on the whole\n", " dataset.\n", "\n", " For multiple metric evaluation, this needs to be a `str` denoting the\n", " scorer that would be used to find the best parameters for refitting\n", " the estimator at the end.\n", "\n", " Where there are considerations other than maximum score in\n", " choosing a best estimator, ``refit`` can be set to a function which\n", " returns the selected ``best_index_`` given ``cv_results_``. In that\n", " case, the ``best_estimator_`` and ``best_params_`` will be set\n", " according to the returned ``best_index_`` while the ``best_score_``\n", " attribute will not be available.\n", "\n", " The refitted estimator is made available at the ``best_estimator_``\n", " attribute and permits using ``predict`` directly on this\n", " ``GridSearchCV`` instance.\n", "\n", " Also for multiple metric evaluation, the attributes ``best_index_``,\n", " ``best_score_`` and ``best_params_`` will only be available if\n", " ``refit`` is set and all of them will be determined w.r.t this specific\n", " scorer.\n", "\n", " See ``scoring`` parameter to know more about multiple metric\n", " evaluation.\n", "\n", " .. versionchanged:: 0.20\n", " Support for callable added.\n", "\n", "cv : int, cross-validation generator or an iterable, default=None\n", " Determines the cross-validation splitting strategy.\n", " Possible inputs for cv are:\n", "\n", " - None, to use the default 5-fold cross validation,\n", " - integer, to specify the number of folds in a `(Stratified)KFold`,\n", " - :term:`CV splitter`,\n", " - An iterable yielding (train, test) splits as arrays of indices.\n", "\n", " For integer/None inputs, if the estimator is a classifier and ``y`` is\n", " either binary or multiclass, :class:`StratifiedKFold` is used. In all\n", " other cases, :class:`KFold` is used. These splitters are instantiated\n", " with `shuffle=False` so the splits will be the same across calls.\n", "\n", " Refer :ref:`User Guide ` for the various\n", " cross-validation strategies that can be used here.\n", "\n", " .. versionchanged:: 0.22\n", " ``cv`` default value if None changed from 3-fold to 5-fold.\n", "\n", "verbose : int\n", " Controls the verbosity: the higher, the more messages.\n", "\n", " - >1 : the computation time for each fold and parameter candidate is\n", " displayed;\n", " - >2 : the score is also displayed;\n", " - >3 : the fold and candidate parameter indexes are also displayed\n", " together with the starting time of the computation.\n", "\n", "pre_dispatch : int, or str, default='2*n_jobs'\n", " Controls the number of jobs that get dispatched during parallel\n", " execution. Reducing this number can be useful to avoid an\n", " explosion of memory consumption when more jobs get dispatched\n", " than CPUs can process. This parameter can be:\n", "\n", " - None, in which case all the jobs are immediately\n", " created and spawned. Use this for lightweight and\n", " fast-running jobs, to avoid delays due to on-demand\n", " spawning of the jobs\n", "\n", " - An int, giving the exact number of total jobs that are\n", " spawned\n", "\n", " - A str, giving an expression as a function of n_jobs,\n", " as in '2*n_jobs'\n", "\n", "error_score : 'raise' or numeric, default=np.nan\n", " Value to assign to the score if an error occurs in estimator fitting.\n", " If set to 'raise', the error is raised. If a numeric value is given,\n", " FitFailedWarning is raised. This parameter does not affect the refit\n", " step, which will always raise the error.\n", "\n", "return_train_score : bool, default=False\n", " If ``False``, the ``cv_results_`` attribute will not include training\n", " scores.\n", " Computing training scores is used to get insights on how different\n", " parameter settings impact the overfitting/underfitting trade-off.\n", " However computing the scores on the training set can be computationally\n", " expensive and is not strictly required to select the parameters that\n", " yield the best generalization performance.\n", "\n", " .. versionadded:: 0.19\n", "\n", " .. versionchanged:: 0.21\n", " Default value was changed from ``True`` to ``False``\n", "\n", "Attributes\n", "----------\n", "cv_results_ : dict of numpy (masked) ndarrays\n", " A dict with keys as column headers and values as columns, that can be\n", " imported into a pandas ``DataFrame``.\n", "\n", " For instance the below given table\n", "\n", " +------------+-----------+------------+-----------------+---+---------+\n", " |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|\n", " +============+===========+============+=================+===+=========+\n", " | 'poly' | -- | 2 | 0.80 |...| 2 |\n", " +------------+-----------+------------+-----------------+---+---------+\n", " | 'poly' | -- | 3 | 0.70 |...| 4 |\n", " +------------+-----------+------------+-----------------+---+---------+\n", " | 'rbf' | 0.1 | -- | 0.80 |...| 3 |\n", " +------------+-----------+------------+-----------------+---+---------+\n", " | 'rbf' | 0.2 | -- | 0.93 |...| 1 |\n", " +------------+-----------+------------+-----------------+---+---------+\n", "\n", " will be represented by a ``cv_results_`` dict of::\n", "\n", " {\n", " 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],\n", " mask = [False False False False]...)\n", " 'param_gamma': masked_array(data = [-- -- 0.1 0.2],\n", " mask = [ True True False False]...),\n", " 'param_degree': masked_array(data = [2.0 3.0 -- --],\n", " mask = [False False True True]...),\n", " 'split0_test_score' : [0.80, 0.70, 0.80, 0.93],\n", " 'split1_test_score' : [0.82, 0.50, 0.70, 0.78],\n", " 'mean_test_score' : [0.81, 0.60, 0.75, 0.85],\n", " 'std_test_score' : [0.01, 0.10, 0.05, 0.08],\n", " 'rank_test_score' : [2, 4, 3, 1],\n", " 'split0_train_score' : [0.80, 0.92, 0.70, 0.93],\n", " 'split1_train_score' : [0.82, 0.55, 0.70, 0.87],\n", " 'mean_train_score' : [0.81, 0.74, 0.70, 0.90],\n", " 'std_train_score' : [0.01, 0.19, 0.00, 0.03],\n", " 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],\n", " 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],\n", " 'mean_score_time' : [0.01, 0.06, 0.04, 0.04],\n", " 'std_score_time' : [0.00, 0.00, 0.00, 0.01],\n", " 'params' : [{'kernel': 'poly', 'degree': 2}, ...],\n", " }\n", "\n", " NOTE\n", "\n", " The key ``'params'`` is used to store a list of parameter\n", " settings dicts for all the parameter candidates.\n", "\n", " The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n", " ``std_score_time`` are all in seconds.\n", "\n", " For multi-metric evaluation, the scores for all the scorers are\n", " available in the ``cv_results_`` dict at the keys ending with that\n", " scorer's name (``'_'``) instead of ``'_score'`` shown\n", " above. ('split0_test_precision', 'mean_train_precision' etc.)\n", "\n", "best_estimator_ : estimator\n", " Estimator that was chosen by the search, i.e. estimator\n", " which gave highest score (or smallest loss if specified)\n", " on the left out data. Not available if ``refit=False``.\n", "\n", " See ``refit`` parameter for more information on allowed values.\n", "\n", "best_score_ : float\n", " Mean cross-validated score of the best_estimator\n", "\n", " For multi-metric evaluation, this is present only if ``refit`` is\n", " specified.\n", "\n", " This attribute is not available if ``refit`` is a function.\n", "\n", "best_params_ : dict\n", " Parameter setting that gave the best results on the hold out data.\n", "\n", " For multi-metric evaluation, this is present only if ``refit`` is\n", " specified.\n", "\n", "best_index_ : int\n", " The index (of the ``cv_results_`` arrays) which corresponds to the best\n", " candidate parameter setting.\n", "\n", " The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n", " the parameter setting for the best model, that gives the highest\n", " mean score (``search.best_score_``).\n", "\n", " For multi-metric evaluation, this is present only if ``refit`` is\n", " specified.\n", "\n", "scorer_ : function or a dict\n", " Scorer function used on the held out data to choose the best\n", " parameters for the model.\n", "\n", " For multi-metric evaluation, this attribute holds the validated\n", " ``scoring`` dict which maps the scorer key to the scorer callable.\n", "\n", "n_splits_ : int\n", " The number of cross-validation splits (folds/iterations).\n", "\n", "refit_time_ : float\n", " Seconds used for refitting the best model on the whole dataset.\n", "\n", " This is present only if ``refit`` is not False.\n", "\n", " .. versionadded:: 0.20\n", "\n", "multimetric_ : bool\n", " Whether or not the scorers compute several metrics.\n", "\n", "classes_ : ndarray of shape (n_classes,)\n", " The classes labels. This is present only if ``refit`` is specified and\n", " the underlying estimator is a classifier.\n", "\n", "n_features_in_ : int\n", " Number of features seen during :term:`fit`. Only defined if\n", " `best_estimator_` is defined (see the documentation for the `refit`\n", " parameter for more details) and that `best_estimator_` exposes\n", " `n_features_in_` when fit.\n", "\n", " .. versionadded:: 0.24\n", "\n", "feature_names_in_ : ndarray of shape (`n_features_in_`,)\n", " Names of features seen during :term:`fit`. Only defined if\n", " `best_estimator_` is defined (see the documentation for the `refit`\n", " parameter for more details) and that `best_estimator_` exposes\n", " `feature_names_in_` when fit.\n", "\n", " .. versionadded:: 1.0\n", "\n", "Notes\n", "-----\n", "The parameters selected are those that maximize the score of the left out\n", "data, unless an explicit score is passed in which case it is used instead.\n", "\n", "If `n_jobs` was set to a value higher than one, the data is copied for each\n", "point in the grid (and not `n_jobs` times). This is done for efficiency\n", "reasons if individual jobs take very little time, but may raise errors if\n", "the dataset is large and not enough memory is available. A workaround in\n", "this case is to set `pre_dispatch`. Then, the memory is copied only\n", "`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\n", "n_jobs`.\n", "\n", "See Also\n", "---------\n", "ParameterGrid : Generates all the combinations of a hyperparameter grid.\n", "train_test_split : Utility function to split the data into a development\n", " set usable for fitting a GridSearchCV instance and an evaluation set\n", " for its final evaluation.\n", "sklearn.metrics.make_scorer : Make a scorer from a performance metric or\n", " loss function.\n", "\n", "Examples\n", "--------\n", ">>> from sklearn import svm, datasets\n", ">>> from sklearn.model_selection import GridSearchCV\n", ">>> iris = datasets.load_iris()\n", ">>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}\n", ">>> svc = svm.SVC()\n", ">>> clf = GridSearchCV(svc, parameters)\n", ">>> clf.fit(iris.data, iris.target)\n", "GridSearchCV(estimator=SVC(),\n", " param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})\n", ">>> sorted(clf.cv_results_.keys())\n", "['mean_fit_time', 'mean_score_time', 'mean_test_score',...\n", " 'param_C', 'param_kernel', 'params',...\n", " 'rank_test_score', 'split0_test_score',...\n", " 'split2_test_score', ...\n", " 'std_fit_time', 'std_score_time', 'std_test_score']\n", "\u001b[0;31mFile:\u001b[0m /Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/model_selection/_search.py\n", "\u001b[0;31mType:\u001b[0m ABCMeta\n", "\u001b[0;31mSubclasses:\u001b[0m \n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "?GridSearchCV" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mean_fit_time': array([0.00410557, 0.00349936, 0.00289578, 0.00251188, 0.00247269,\n", " 0.00252709, 0.00272069, 0.00246129, 0.00306287, 0.00371184,\n", " 0.00294638, 0.00308042, 0.00299649, 0.00300522, 0.00273948,\n", " 0.00361261, 0.00310259, 0.00313864, 0.00295277, 0.00384784,\n", " 0.00304828, 0.00281844, 0.00319686, 0.00302219, 0.00305853,\n", " 0.00290966, 0.00289521, 0.00281544, 0.00308471, 0.00267277,\n", " 0.00285001, 0.0030632 , 0.00333405, 0.00302234, 0.00311818,\n", " 0.00305524, 0.00284081, 0.00307274, 0.00295916, 0.00310979,\n", " 0.00310645, 0.00305629, 0.00277839, 0.00244412, 0.00253983,\n", " 0.00268669, 0.00238819, 0.00241308, 0.00232444]),\n", " 'std_fit_time': array([7.88619106e-04, 9.42372814e-04, 5.09592826e-04, 2.05940145e-04,\n", " 1.30329147e-04, 1.39555875e-04, 4.71658495e-04, 9.38250994e-05,\n", " 7.01362537e-04, 9.40648512e-04, 5.46725004e-04, 6.70972481e-04,\n", " 6.49284812e-04, 6.08038812e-04, 4.14937119e-04, 5.44774122e-04,\n", " 4.02125639e-04, 7.85894654e-04, 4.22827063e-04, 3.05221040e-04,\n", " 4.93310674e-04, 4.91984687e-04, 6.07253808e-04, 3.68679192e-04,\n", " 7.83753444e-04, 3.84013546e-04, 5.97170735e-04, 3.48093033e-04,\n", " 5.59792474e-04, 1.83740436e-04, 5.59706703e-04, 6.29626715e-04,\n", " 6.91515668e-04, 5.43012236e-04, 4.23518716e-04, 4.46913336e-04,\n", " 2.66436123e-04, 5.09237350e-04, 2.87175098e-04, 5.41384761e-04,\n", " 3.48689486e-04, 4.61566522e-04, 4.62282743e-05, 1.11534301e-04,\n", " 2.85003914e-04, 3.43765774e-04, 4.60957275e-05, 6.26801311e-05,\n", " 2.83467783e-04]),\n", " 'mean_score_time': array([0.0031558 , 0.00250463, 0.00250649, 0.00215402, 0.00216942,\n", " 0.00229611, 0.00228481, 0.00209074, 0.00282593, 0.00295038,\n", " 0.00259624, 0.00252881, 0.00267229, 0.00262327, 0.00244746,\n", " 0.00306764, 0.00275512, 0.00263891, 0.00269318, 0.00277519,\n", " 0.00260739, 0.00271072, 0.00273805, 0.0025115 , 0.00240846,\n", " 0.00260949, 0.00252876, 0.00271316, 0.00271668, 0.00248637,\n", " 0.00258036, 0.00288286, 0.0027564 , 0.00256944, 0.00278878,\n", " 0.00260425, 0.0026751 , 0.00264025, 0.00280237, 0.0027246 ,\n", " 0.00317144, 0.00274553, 0.00256987, 0.00226922, 0.00251756,\n", " 0.00245085, 0.0023438 , 0.00224037, 0.00238848]),\n", " 'std_score_time': array([1.11923762e-03, 5.02003516e-04, 6.28364240e-04, 2.53515694e-04,\n", " 1.18820261e-04, 2.69056802e-04, 2.76645180e-04, 7.58113199e-05,\n", " 6.51008842e-04, 6.12322828e-04, 5.13428608e-04, 4.03309776e-04,\n", " 4.90662646e-04, 4.82464803e-04, 2.78491622e-04, 2.98720787e-04,\n", " 3.83199448e-04, 4.57812113e-04, 3.96594125e-04, 4.14270047e-04,\n", " 4.59691211e-04, 4.23115014e-04, 3.26109745e-04, 2.89609198e-04,\n", " 2.27550939e-04, 4.39541240e-04, 3.07336423e-04, 6.50968867e-04,\n", " 3.49868736e-04, 1.77414479e-04, 2.67554022e-04, 4.83264888e-04,\n", " 4.17323549e-04, 2.95681527e-04, 2.24535524e-04, 3.38101453e-04,\n", " 2.95083632e-04, 3.19394449e-04, 4.91383988e-04, 3.39971364e-04,\n", " 5.14225423e-04, 4.11409220e-04, 9.38881830e-05, 3.99615144e-05,\n", " 2.96201578e-04, 2.11339188e-04, 8.54063938e-05, 6.91232000e-05,\n", " 3.91190910e-04]),\n", " 'param_knn__n_neighbors': masked_array(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,\n", " 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,\n", " 45, 46, 47, 48, 49],\n", " mask=[False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False, False, False, False, False, False, False, False,\n", " False],\n", " fill_value='?',\n", " dtype=object),\n", " 'params': [{'knn__n_neighbors': 1},\n", " {'knn__n_neighbors': 2},\n", " {'knn__n_neighbors': 3},\n", " {'knn__n_neighbors': 4},\n", " {'knn__n_neighbors': 5},\n", " {'knn__n_neighbors': 6},\n", " {'knn__n_neighbors': 7},\n", " {'knn__n_neighbors': 8},\n", " {'knn__n_neighbors': 9},\n", " {'knn__n_neighbors': 10},\n", " {'knn__n_neighbors': 11},\n", " {'knn__n_neighbors': 12},\n", " {'knn__n_neighbors': 13},\n", " {'knn__n_neighbors': 14},\n", " {'knn__n_neighbors': 15},\n", " {'knn__n_neighbors': 16},\n", " {'knn__n_neighbors': 17},\n", " {'knn__n_neighbors': 18},\n", " {'knn__n_neighbors': 19},\n", " {'knn__n_neighbors': 20},\n", " {'knn__n_neighbors': 21},\n", " {'knn__n_neighbors': 22},\n", " {'knn__n_neighbors': 23},\n", " {'knn__n_neighbors': 24},\n", " {'knn__n_neighbors': 25},\n", " {'knn__n_neighbors': 26},\n", " {'knn__n_neighbors': 27},\n", " {'knn__n_neighbors': 28},\n", " {'knn__n_neighbors': 29},\n", " {'knn__n_neighbors': 30},\n", " {'knn__n_neighbors': 31},\n", " {'knn__n_neighbors': 32},\n", " {'knn__n_neighbors': 33},\n", " {'knn__n_neighbors': 34},\n", " {'knn__n_neighbors': 35},\n", " {'knn__n_neighbors': 36},\n", " {'knn__n_neighbors': 37},\n", " {'knn__n_neighbors': 38},\n", " {'knn__n_neighbors': 39},\n", " {'knn__n_neighbors': 40},\n", " {'knn__n_neighbors': 41},\n", " {'knn__n_neighbors': 42},\n", " {'knn__n_neighbors': 43},\n", " {'knn__n_neighbors': 44},\n", " {'knn__n_neighbors': 45},\n", " {'knn__n_neighbors': 46},\n", " {'knn__n_neighbors': 47},\n", " {'knn__n_neighbors': 48},\n", " {'knn__n_neighbors': 49}],\n", " 'split0_test_score': array([0.87222222, 0.91851852, 0.97222222, 0.96666667, 0.97407407,\n", " 0.97037037, 0.96481481, 0.97222222, 0.98148148, 0.98518519,\n", " 0.98148148, 0.97962963, 0.97777778, 0.97592593, 0.97222222,\n", " 0.96851852, 0.97407407, 0.97037037, 0.96666667, 0.97037037,\n", " 0.97777778, 0.97777778, 0.97777778, 0.97777778, 0.97777778,\n", " 0.98148148, 0.97777778, 0.97777778, 0.97777778, 0.97777778,\n", " 0.97777778, 0.97777778, 0.97592593, 0.97037037, 0.96851852,\n", " 0.96851852, 0.97592593, 0.97592593, 0.97592593, 0.97592593,\n", " 0.97407407, 0.97407407, 0.97407407, 0.97407407, 0.97407407,\n", " 0.97407407, 0.97407407, 0.97407407, 0.97777778]),\n", " 'split1_test_score': array([0.97222222, 0.97222222, 0.97222222, 0.97222222, 0.97222222,\n", " 1. , 1. , 0.99814815, 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 0.99814815, 0.99814815, 0.99814815, 0.99814815, 0.99814815,\n", " 0.99814815, 0.99814815, 0.9962963 , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 0.99814815, 0.99814815, 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. ]),\n", " 'split2_test_score': array([0.93333333, 0.95882353, 0.95686275, 0.95686275, 0.95686275,\n", " 0.95294118, 0.95098039, 0.95098039, 0.94901961, 0.94705882,\n", " 0.95294118, 0.95294118, 0.98039216, 0.99019608, 0.99215686,\n", " 0.99019608, 0.98823529, 0.98823529, 0.98823529, 0.98627451,\n", " 0.98627451, 0.98431373, 0.98039216, 0.97647059, 0.9745098 ,\n", " 0.9745098 , 0.97254902, 0.97647059, 0.97647059, 0.98235294,\n", " 0.97843137, 0.97843137, 0.97843137, 0.97647059, 0.97647059,\n", " 0.9745098 , 0.9745098 , 0.97058824, 0.97058824, 0.96862745,\n", " 0.96666667, 0.96666667, 0.9627451 , 0.96078431, 0.96078431,\n", " 0.9627451 , 0.9627451 , 0.9627451 , 0.96078431]),\n", " 'split3_test_score': array([0.90784314, 0.9372549 , 0.9372549 , 0.92941176, 0.92941176,\n", " 0.95882353, 0.95882353, 0.96470588, 0.9627451 , 0.9627451 ,\n", " 0.98627451, 0.98627451, 0.99215686, 0.99411765, 0.99019608,\n", " 0.98823529, 0.98823529, 0.98627451, 0.98431373, 0.98235294,\n", " 0.98039216, 0.97647059, 0.97058824, 0.96862745, 0.96862745,\n", " 0.97254902, 0.97647059, 0.9745098 , 0.9745098 , 0.97058824,\n", " 0.97647059, 0.9745098 , 0.9745098 , 0.97254902, 0.97254902,\n", " 0.97058824, 0.96862745, 0.96862745, 0.96862745, 0.96862745,\n", " 0.96862745, 0.9745098 , 0.9745098 , 0.97254902, 0.9745098 ,\n", " 0.97647059, 0.97647059, 0.97647059, 0.97843137]),\n", " 'split4_test_score': array([0.9372549 , 0.9372549 , 0.93333333, 0.92941176, 0.9254902 ,\n", " 0.92352941, 0.94901961, 0.94705882, 0.94705882, 0.94705882,\n", " 0.94705882, 0.94705882, 0.94705882, 0.94705882, 0.94705882,\n", " 0.94705882, 0.97058824, 0.97058824, 0.97647059, 0.97647059,\n", " 0.9745098 , 0.9745098 , 0.9745098 , 0.9745098 , 0.9745098 ,\n", " 0.9745098 , 0.97254902, 0.9745098 , 0.97254902, 0.97254902,\n", " 0.97254902, 0.97254902, 0.97254902, 0.97254902, 0.96862745,\n", " 0.96862745, 0.96470588, 0.96470588, 0.96666667, 0.96470588,\n", " 0.96666667, 0.96666667, 0.97058824, 0.97058824, 0.96862745,\n", " 0.96666667, 0.96666667, 0.96470588, 0.96470588]),\n", " 'mean_test_score': array([0.92457516, 0.94481481, 0.95437908, 0.95091503, 0.9516122 ,\n", " 0.9611329 , 0.96472767, 0.96662309, 0.968061 , 0.96840959,\n", " 0.9735512 , 0.97318083, 0.97947712, 0.98145969, 0.9803268 ,\n", " 0.97880174, 0.98422658, 0.98309368, 0.98313725, 0.98309368,\n", " 0.98342048, 0.98224401, 0.98028322, 0.97910675, 0.9787146 ,\n", " 0.98023965, 0.97949891, 0.97991285, 0.98026144, 0.98065359,\n", " 0.98104575, 0.98065359, 0.98028322, 0.9783878 , 0.97723312,\n", " 0.9764488 , 0.97675381, 0.9759695 , 0.97636166, 0.97557734,\n", " 0.9748366 , 0.97601307, 0.97638344, 0.97559913, 0.97559913,\n", " 0.97599129, 0.97599129, 0.97559913, 0.97633987]),\n", " 'std_test_score': array([0.03325211, 0.01872544, 0.01660774, 0.0182333 , 0.02064992,\n", " 0.02482919, 0.01851883, 0.0182006 , 0.02016003, 0.02108635,\n", " 0.02025305, 0.02012114, 0.01809218, 0.01894378, 0.01895695,\n", " 0.01887616, 0.01067725, 0.01131942, 0.01120224, 0.01002235,\n", " 0.00831217, 0.00860608, 0.00951598, 0.01002235, 0.01015531,\n", " 0.00945584, 0.00955541, 0.00828506, 0.01002709, 0.0105113 ,\n", " 0.00969438, 0.00990881, 0.01004308, 0.01098385, 0.01175462,\n", " 0.01197354, 0.01230688, 0.01254751, 0.0122156 , 0.01273875,\n", " 0.01196711, 0.01158181, 0.01253933, 0.01305082, 0.01316813,\n", " 0.01298256, 0.01298256, 0.0132844 , 0.01373731]),\n", " 'rank_test_score': array([49, 48, 45, 47, 46, 44, 43, 42, 41, 40, 38, 39, 18, 7, 11, 20, 1,\n", " 4, 3, 4, 2, 6, 12, 19, 21, 15, 17, 16, 14, 9, 8, 9, 12, 22,\n", " 23, 25, 24, 32, 27, 36, 37, 29, 26, 33, 33, 30, 30, 33, 28],\n", " dtype=int32)}" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_cv.cv_results_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 用整個 training set 做 fitting. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 剛剛的 grid_cv 物件,再找完最佳參數後,就已經幫你把整個 training set 給 fit 完了" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 用 testing set 做 predict" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "pred_label_test = grid_cv.predict(X_test)\n", "pred_prob_test = grid_cv.predict_proba(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 效果評估" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9426699426699426\n", "[[31 6]\n", " [ 3 30]]\n", " precision recall f1-score support\n", "\n", " democrat 0.91 0.84 0.87 37\n", " republican 0.83 0.91 0.87 33\n", "\n", " accuracy 0.87 70\n", " macro avg 0.87 0.87 0.87 70\n", "weighted avg 0.87 0.87 0.87 70\n", "\n" ] } ], "source": [ "# 評估結果\n", "print(grid_cv.score(X_test, y_test))\n", "print(confusion_matrix(y_test, pred_label_test))\n", "print(classification_report(y_test, pred_label_test))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### KNN (multi_class)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 讀資料集" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 要來引入數字辨認資料集" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from sklearn import datasets\n", "digits = datasets.load_digits()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 這筆資料,x 分成兩種: \n", " * images: 1797張image x 8 x 8 的 3d-array. \n", " * data: 每張 image 拉成 64 個 column,所以變成 1797x64 的 2d-array\n", "* 看一下 shape 是不是真的是這樣:" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1797, 8, 8)\n", "(1797, 64)\n" ] } ], "source": [ "print(digits.images.shape)\n", "print(digits.data.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 我們可以看一張圖片:" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPUAAAD4CAYAAAA0L6C7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAO3UlEQVR4nO3db0yV9f/H8ReBCaiB55QoIOJRsXRzDDTNclOhPzPmXDdcMt2QG5m6OYZZZC1s5dZU1Nh02GZ4u1tOm7fcdG56R1GmM8ekoem0mBCm/JEdDt8b331dv19/zrk+nHMdePt83GOed58X5avr4nB9zidleHh4WADMeC7ZAQDEF6UGjKHUgDGUGjCGUgPGUGrAmLRkBxhLampqnOZqa2u1f/9+z3Otra1O6/nlyJEj2rRpU7Jj/KuqqirPMxUVFfrxxx99Wy/euFIDxlBqwBhKDRhDqQFjKDVgDKUGjKHUgDGUGjCGUgPGxPREWWtrq5qbmxWJRFRWVqY1a9YkOBYAV1Gv1JFIREePHtXOnTt14MABnT9/Xnfv3vUjGwAHUUvd3t6uqVOnKicnR2lpaVq6dKkuXrzoRzYADqLefnd3dysYDD79OhgM6ubNm3953enTp3X69GlJ0jfffBPHiKNHbW2t01xOTo7TbH9/v9N6fpkxY4aOHDmS7Bj/6s9/d2OVlZWlioqKBKTxR9x2aZWXl6u8vDxe/7hRyWWnlcQurWRil9bfCAQC6urqevp1V1eXAoFAQkMBcBe11LNmzdL9+/fV2dmpcDisCxcuaOHChX5kA+Ag6u13amqqqqurtXv3bkUiEa1YsULTp0/3IxsABzH9TF1SUqKSkpJEZwEQBzxRBhhDqQFjKDVgDKUGjKHUgDGUGjCGUgPGpIz1Q+d7enp8W6u4uNhp7sSJE1q9erXnuV27djmt56KwsNDzTGlpqVpaWuIfJo5cvq9p06bp/v37vq0Xb1ypAWMoNWAMpQaModSAMZQaMIZSA8ZQasAYSg0YQ6kBYyg1YEzUjzM6fPiwLl++rKysLDU0NPiRCcAIRL1SL1++XDt37vQjC4A4iFrqefPmaeLEiX5kARAHcTuhI1nH7vj5P5wTJ044zc2ePdtpNjc312k9F+PHj/c8k5mZqdLS0gSkiZ/nn3/e88y4ceM0bdq0BKTxx5g/dufx48e+reWyfVJi62UysfUSwJhHqQFjot5+Hzx4UD/99JMePXqkDz/8UGvXrtXKlSv9yAbAQdRS19TU+BADQLxw+w0YQ6kBYyg1YAylBoyh1IAxlBowhlIDxsTt2e9kaW1t9W0t1yN+hoaGnGaPHTvmtJ4LlyOFQqGQjh8/7nnOz2fas7OzneZGwzPcrrhSA8ZQasAYSg0YQ6kBYyg1YAylBoyh1IAxlBowhlIDxlBqwJioj4k+ePBAhw4dUk9Pj1JSUlReXq5Vq1b5kQ2Ag6ilTk1N1YYNGxQKhdTf36+6ujotWLBA+fn5fuQD4FHU2+/JkycrFApJkjIyMpSXl6fu7u6EBwPgxtMurc7OTnV0dGj27Nl/+bNkHbvj57EvZ86ccZqbO3eu02xmZqbTei4yMjI8z+Tk5Ki2ttbzHGezJVbK8PDwcCwvHBgYUH19vd577z0tXrw40blidvbsWd/WWrNmjdPcmTNntGLFCs9zLtshXbmsVVtbq/3793ueGwtbL8eymN79DofDamho0LJly0ZVoQH8VdRSDw8Pq6mpSXl5eaqoqPAjE4ARiPozdVtbm86dO6eCggLt2LFDkrRu3TqVlJQkPBwA76KW+uWXX9YPP/zgRxYAccATZYAxlBowhlIDxlBqwBhKDRhDqQFjKDVgDKUGjBnzZ2lZtnz5ct/WcjmTbHBwULdu3fI8V1VV5XnGlctZX2MdV2rAGEoNGEOpAWMoNWAMpQaModSAMZQaMIZSA8ZQasCYqE+UDQ4Oqr6+XuFwWENDQ1qyZInWrl3rRzYADqKWety4caqvr1d6errC4bC++OILFRcXq6ioyI98ADyKevudkpKi9PR0SdLQ0JCGhoaUkpKS8GAA3MR0QkckEtEnn3yiX3/9VW+//bbWr1//l9ck69idR48e+bZWe3u709zcuXPV1tbmeS4nJ8dpPRd9fX2eZ/Lz83X37t0EpImfvzsiyrqYj92RpN7eXu3bt08bN25UQUFBInPFzPKxOzU1NU7ruXDZpbVv3z599NFH8Q8TR+zSimLChAmaP3++018AAP6IWuo//vhDvb29kv77TvjVq1eVl5eX8GAA3ER99/v333/XoUOHFIlENDw8rNdee83X42MBeBO11DNmzNCePXv8yAIgDniiDDCGUgPGUGrAGEoNGEOpAWMoNWAMpQaModSAMZ42dDzrenp6nOYmTpyox48fe57Lzs52Wm+0Kyws9G2tY8eOeZ4pLS1VS0uL03p+HpX0T7hSA8ZQasAYSg0YQ6kBYyg1YAylBoyh1IAxlBowhlIDxlBqwJiYSx2JRPTxxx/7+kH9ALyLudSnTp3io4GBMSCmUnd1deny5csqKytLdB4AIxT1I4Kl/+50Wb9+vfr7+//xNck6S8tPEydOdJpLTU11nrXoxIkTvq01c+ZMzzOZmZlj+rPto5a6paVFWVlZCoVCun79+j++rry8XOXl5XENN9q4bJ+U2Hr5/61evdq3tZ7FrZdRS93W1qZLly7pypUrGhwcVH9/vxobG7Vt2zY/8gHwKGqpKysrVVlZKUm6fv26Tp48SaGBUYzfUwPGxPRG2f/Mnz9f8+fPT1QWAHHAlRowhlIDxlBqwBhKDRhDqQFjKDVgDKUGjPH0e+pn3Uiexbb6HLcLP5+PPnv2rOeZoqIipzlpdDz7zZUaMIZSA8ZQasAYSg0YQ6kBYyg1YAylBoyh1IAxlBowhlIDxsT0mOjWrVuVnp6u5557TqmpqWY/1xuwIOZnv+vr6/XCCy8kMguAOOD2GzAmZXh4eDjai7Zu3fr02Jg333zzb0/ieBaO3UF8dHR0+LbW+PHjPc+8+OKLevDggdN6ubm5TnPxFFOpu7u7FQgE9PDhQ3399dfauHGj5s2b50c+GFRVVeXbWoWFhZ5nPvjgA3333XdO6+3atctpLp5iuv0OBAKSpKysLC1atEjt7e0JDQXAXdRSDwwMPD3tcmBgQFevXlVBQUHCgwFwE/Xd74cPH2rfvn2SpKGhIb3xxhsqLi5OdC4AjqKWOicnR3v37vUjC4A44FdagDGUGjCGUgPGUGrAGEoNGEOpAWMoNWAMx+544PrMcn19vb788kvPcwcPHnRaz4WfxwK5PI+N2HGlBoyh1IAxlBowhlIDxlBqwBhKDRhDqQFjKDVgDKUGjKHUgDExPSba29urpqYm3blzRykpKdq8ebOKiooSnQ2Ag5hK3dzcrOLiYm3fvl3hcFhPnjxJdC4AjqLefvf19enGjRtauXKlJCktLU0TJkxIeDAAbqKe0HHr1i0dOXJE+fn5un37tkKhkKqqqpSenv5/XvcsHLvjelxMbm6u7t2753lu+vTpTuu5SEvzb8Oey78LP5k/dufnn3/WZ599pq+++kpz5sxRc3OzMjIy9P777/uVcdRg62V8jIajaf6N+WN3gsGggsGg5syZI0lasmSJrwecAfAmaqmzs7MVDAaf3jJdu3ZN+fn5CQ8GwE1MP0hVV1ersbFR4XBYU6ZM0ZYtWxKdC4CjmEpdWFho9s0vwBqeKAOModSAMZQaMIZSA8ZQasAYSg0YQ6kBYyg1YAxnaXnguukhNTXVaXby5MlO6/nl4sWLWrRokee5rKysBKT5e8ePH/c8M2nSJC1fvjzuWfzClRowhlIDxlBqwBhKDRhDqQFjKDVgDKUGjKHUgDGUGjAm6hNl9+7d04EDB55+3dnZqbVr1+rdd99NaDAAbqKWOjc3V3v37pUkRSIRbdq0Sa+++mrCgwFw4+n2+9q1a5o6dapeeumlROUBMEJRT+j4s8OHDysUCumdd975y589C8fu/PLLL05zOTk5+u233zzPdXZ2Oq3nl1deeUU3btzwPJeampqANH9v9uzZnmcyMzPV19fntN6kSZOc5uIp5lKHw2Ft2rRJDQ0Nvh7RMprU1NQ4zdXW1mr//v2e57799lun9fxidZdWaWmpWlpanNYbDbu7Yr79vnLlimbOnPnMFhoYK2Iu9fnz5/X6668nMguAOIip1AMDA7p69aoWL16c6DwARiimTz5JT0/X999/n+gsAOKAJ8oAYyg1YAylBoyh1IAxlBowhlIDxlBqwBhKDRjjaZcWgNFv1Fyp6+rqkh0hYax+b3xfo9OoKTWA+KDUgDGjptTl5eXJjpAwVr83vq/RiTfKAGNGzZUaQHxQasCYmD4kIdFaW1vV3NysSCSisrIyrVmzJtmRRuzBgwc6dOiQenp6lJKSovLycq1atSrZseImEomorq5OgUBgzP8K6M96e3vV1NSkO3fuKCUlRZs3b1ZRUVGyY3mS9FJHIhEdPXpUn3/+uYLBoD799FMtXLhQ+fn5yY42IqmpqdqwYYNCoZD6+/tVV1enBQsWjPnv639OnTqlvLw89ff3JztKXDU3N6u4uFjbt29XOBzWkydPkh3Js6Tffre3t2vq1KnKyclRWlqali5dqosXLyY71ohNnjxZoVBIkpSRkaG8vDx1d3cnOVV8dHV16fLlyyorK0t2lLjq6+vTjRs3tHLlSklSWlqaJkyYkORU3iX9St3d3a1gMPj062AwqJs3byYxUfx1dnaqo6PD6YPlR6Njx45p/fr15q7SnZ2deuGFF3T48GHdvn1boVBIVVVVSk9PT3Y0T5J+pbZuYGBADQ0NqqqqUmZmZrLjjFhLS4uysrKe3oVYMjQ0pI6ODr311lvas2ePxo8f73QYQLIl/UodCATU1dX19Ouuri4FAoEkJoqfcDishoYGLVu2zMzHK7e1tenSpUu6cuWKBgcH1d/fr8bGRm3bti3Z0UYsGAwqGAxqzpw5kqQlS5ZQahezZs3S/fv31dnZqUAgoAsXLpj4CzI8PKympibl5eWpoqIi2XHiprKyUpWVlZKk69ev6+TJkyb+e0lSdna2gsGg7t27p9zcXF27dm1MvrGZ9FKnpqaqurpau3fvViQS0YoVKzR9+vRkxxqxtrY2nTt3TgUFBdqxY4ckad26dSopKUlyMvyb6upqNTY2KhwOa8qUKdqyZUuyI3nGY6KAMbxRBhhDqQFjKDVgDKUGjKHUgDGUGjCGUgPG/Ac2++/lXZ87gwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(digits.images[1010], cmap=plt.cm.gray_r, interpolation='nearest');" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 那這個任務,蠻熟悉的,就是給我一張圖片,然後我要辨認出他是 0 ~ 9 的哪個數字" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### fit model & predict" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9805555555555555\n", "[[36 0 0 0 0 0 0 0 0 0]\n", " [ 0 36 0 0 0 0 0 0 0 0]\n", " [ 0 0 35 0 0 0 0 0 0 0]\n", " [ 0 0 0 37 0 0 0 0 0 0]\n", " [ 0 0 0 0 36 0 0 0 0 0]\n", " [ 0 0 0 0 0 37 0 0 0 0]\n", " [ 0 0 0 0 0 0 35 0 1 0]\n", " [ 0 0 0 0 0 0 0 36 0 0]\n", " [ 0 3 0 0 0 0 0 1 31 0]\n", " [ 0 0 0 0 1 0 0 0 1 34]]\n", " precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 36\n", " 1 0.92 1.00 0.96 36\n", " 2 1.00 1.00 1.00 35\n", " 3 1.00 1.00 1.00 37\n", " 4 0.97 1.00 0.99 36\n", " 5 1.00 1.00 1.00 37\n", " 6 1.00 0.97 0.99 36\n", " 7 0.97 1.00 0.99 36\n", " 8 0.94 0.89 0.91 35\n", " 9 1.00 0.94 0.97 36\n", "\n", " accuracy 0.98 360\n", " macro avg 0.98 0.98 0.98 360\n", "weighted avg 0.98 0.98 0.98 360\n", "\n" ] } ], "source": [ "# 切資料\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " digits.data, \n", " digits.target, \n", " test_size = 0.2, \n", " random_state = 42, \n", " stratify = digits.target\n", ")\n", "\n", "# fit model\n", "from sklearn.neighbors import KNeighborsClassifier\n", "knn = KNeighborsClassifier(n_neighbors = 6)\n", "knn.fit(X_train, y_train)\n", "\n", "# 預測 training/testing set\n", "pred_label_train = knn.predict(X_train)\n", "pred_prob_train = knn.predict_proba(X_train)\n", "\n", "pred_label_test = knn.predict(X_test)\n", "pred_prob_test = knn.predict_proba(X_test)\n", "\n", "# 評估結果\n", "print(knn.score(X_test, y_test))\n", "print(confusion_matrix(y_test, pred_label_test))\n", "print(classification_report(y_test, pred_label_test))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### overfitting exploration" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 最後,我們來玩點新的,看看 overfitting 的狀況" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "neighbors = np.arange(1, 9) # knn的 k,從複雜(1)到簡單(9)\n", "train_accuracy = np.empty(len(neighbors)) # 先擺個 placeholder\n", "test_accuracy = np.empty(len(neighbors))\n", "\n", "for i, k in enumerate(neighbors):\n", " \n", " knn = KNeighborsClassifier(n_neighbors = k)\n", " knn.fit(X_train, y_train)\n", " \n", " train_accuracy[i] = knn.score(X_train, y_train)\n", " test_accuracy[i] = knn.score(X_test, y_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 畫個圖看看" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots()\n", "ax.plot(neighbors, train_accuracy, label = \"train\");\n", "ax.plot(neighbors, test_accuracy, label = \"test\");\n", "ax.legend();\n", "ax.set(\n", " xlabel='Number of Neighbors', \n", " ylabel='Accuracy',\n", " title='k-NN: Varying Number of Neighbors'\n", ");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Logistic (binary)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 讀資料集" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pregnanciesglucosediastolictricepsinsulinbmidpfagediabetes
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
..............................
76310101764818032.90.171630
76421227027036.80.340270
7655121722311226.20.245300
7661126600030.10.349471
7671937031030.40.315230
\n", "

768 rows × 9 columns

\n", "
" ], "text/plain": [ " pregnancies glucose diastolic triceps insulin bmi dpf age \\\n", "0 6 148 72 35 0 33.6 0.627 50 \n", "1 1 85 66 29 0 26.6 0.351 31 \n", "2 8 183 64 0 0 23.3 0.672 32 \n", "3 1 89 66 23 94 28.1 0.167 21 \n", "4 0 137 40 35 168 43.1 2.288 33 \n", ".. ... ... ... ... ... ... ... ... \n", "763 10 101 76 48 180 32.9 0.171 63 \n", "764 2 122 70 27 0 36.8 0.340 27 \n", "765 5 121 72 23 112 26.2 0.245 30 \n", "766 1 126 60 0 0 30.1 0.349 47 \n", "767 1 93 70 31 0 30.4 0.315 23 \n", "\n", " diabetes \n", "0 1 \n", "1 0 \n", "2 1 \n", "3 0 \n", "4 1 \n", ".. ... \n", "763 0 \n", "764 0 \n", "765 0 \n", "766 1 \n", "767 0 \n", "\n", "[768 rows x 9 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "diabetes = pd.read_csv(\"data/diabetes.csv\")\n", "diabetes" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 768 entries, 0 to 767\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 pregnancies 768 non-null int64 \n", " 1 glucose 768 non-null int64 \n", " 2 diastolic 768 non-null int64 \n", " 3 triceps 768 non-null int64 \n", " 4 insulin 768 non-null int64 \n", " 5 bmi 768 non-null float64\n", " 6 dpf 768 non-null float64\n", " 7 age 768 non-null int64 \n", " 8 diabetes 768 non-null int64 \n", "dtypes: float64(2), int64(7)\n", "memory usage: 54.1 KB\n" ] } ], "source": [ "diabetes.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### fit, predict, and evaluate" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:372: FitFailedWarning: \n", "75 fits failed out of a total of 150.\n", "The score on these train-test partitions for these parameters will be set to nan.\n", "If these failures are not expected, you can try to debug them by setting error_score='raise'.\n", "\n", "Below are more details about the failures:\n", "--------------------------------------------------------------------------------\n", "75 fits failed with the following error:\n", "Traceback (most recent call last):\n", " File \"/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", " estimator.fit(X_train, y_train, **fit_params)\n", " File \"/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", " solver = _check_solver(self.solver, self.penalty, self.dual)\n", " File \"/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py\", line 447, in _check_solver\n", " raise ValueError(\n", "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.\n", "\n", " warnings.warn(some_fits_failed_message, FitFailedWarning)\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/model_selection/_search.py:969: UserWarning: One or more of the test scores are non-finite: [ nan 0.72391304 nan 0.7673913 nan 0.77608696\n", " nan 0.7826087 nan 0.77391304 nan 0.78913043\n", " nan 0.78043478 nan 0.78043478 nan 0.77391304\n", " nan 0.77391304 nan 0.76956522 nan 0.77391304\n", " nan 0.7673913 nan 0.77608696 nan 0.77391304]\n", " warnings.warn(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Tuned Logistic Regression Parameters: {'C': 0.4393970560760795, 'penalty': 'l2'}\n", "Best score is 0.7891304347826086\n", "[[169 32]\n", " [ 47 60]]\n", " precision recall f1-score support\n", "\n", " 0 0.78 0.84 0.81 201\n", " 1 0.65 0.56 0.60 107\n", "\n", " accuracy 0.74 308\n", " macro avg 0.72 0.70 0.71 308\n", "weighted avg 0.74 0.74 0.74 308\n", "\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "0.7007718417259498\n" ] } ], "source": [ "# 切資料\n", "from sklearn.model_selection import train_test_split\n", "\n", "# preprocess (沒用到)\n", "# from sklearn.impute import SimpleImputer\n", "# from sklearn.pipeline import Pipeline\n", "\n", "# modeling\n", "from sklearn.linear_model import LogisticRegression\n", "\n", "# hyper-parameter tunning\n", "from sklearn.model_selection import GridSearchCV\n", "\n", "# evaluation\n", "from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score\n", "\n", "\n", "# 切資料\n", "X = diabetes.drop(\"diabetes\", axis = 1)\n", "y = diabetes[\"diabetes\"]\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, \n", " y, \n", " test_size = 0.4, \n", " random_state = 42, \n", " stratify = y\n", ")\n", "\n", "# model\n", "logreg = LogisticRegression()\n", "\n", "# hyper-parameter tunning\n", "# logistic regression 的 regularization parameter 是 C ,代表 inverse of the regularization strength, \n", "# 所以,C 越大,regularize能力越爛,越容易 overfit model; \n", "# C 越小,regularize能力越強,越容易 underfit model\n", "c_space = np.logspace(-5, 8, 15)\n", "param_grid = {\"C\": c_space, \"penalty\": ['l1', 'l2']}\n", "logreg_cv = GridSearchCV(logreg, param_grid, cv=5)\n", "logreg_cv.fit(X_train, y_train)\n", "print(\"Tuned Logistic Regression Parameters: {}\".format(logreg_cv.best_params_)) \n", "print(\"Best score is {}\".format(logreg_cv.best_score_))\n", "\n", "# 預測 training/testing set\n", "y_pred = logreg_cv.predict(X_test)\n", "y_pred_prob = logreg_cv.predict_proba(X_test)[:,1]\n", "\n", "\n", "# 評估結果\n", "print(\"AUC: {}\".format(roc_auc_score(y_test, y_pred_prob)))\n", "print(confusion_matrix(y_test, y_pred))\n", "print(classification_report(y_test, y_pred))\n", "\n", "fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n", "plt.plot([0,1],[0, 1], \"k--\")\n", "plt.plot(fpr, tpr, label = \"Logistic Regression\")\n", "plt.xlabel(\"False Positive Rate\")\n", "plt.ylabel(\"True Positive Rate\")\n", "plt.title(\"Logistic Regression ROC Curve\")\n", "plt.show()\n", "\n", "print(roc_auc_score(y_test, y_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* precision & recall\n", " * precision = 你說是 positive 的人中,有多少個是真的 positive\n", " * recall = true positive rate = 真的是positive的人中,你揪出了幾個?\n", " * cutpoint -> 1, precision ->1,因為惜字如金,只要我說是positive,幾乎一定是 positive。\n", " * cutpoint -> 0, recall -> 1, 因為cutpoint接近0的時候,幾乎所有人全被說 positive,那recall的分子就幾乎全中了" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### DecisionTree (binary)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tuned Decision Tree Parameters: {'criterion': 'entropy', 'max_depth': 3, 'max_features': 5, 'min_samples_leaf': 5}\n", "Best score is 0.7608695652173914\n", "AUC: 0.7851629701957501\n", "[[174 27]\n", " [ 57 50]]\n", " precision recall f1-score support\n", "\n", " 0 0.75 0.87 0.81 201\n", " 1 0.65 0.47 0.54 107\n", "\n", " accuracy 0.73 308\n", " macro avg 0.70 0.67 0.67 308\n", "weighted avg 0.72 0.73 0.71 308\n", "\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEaCAYAAAD+E0veAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABOOUlEQVR4nO3deXxM1/vA8c/NZCeJLCJCGhH7UlvUTkLQhVItSi2p7rQUse9bqa1qKYoGbRVtUW1/lga174SiKok9YkmCJLJIMuf3R2q+UhITkkyW5/169VV35s69z8kk88y955znaEophRBCCAGYmToAIYQQ+YckBSGEEAaSFIQQQhhIUhBCCGEgSUEIIYSBJAUhhBAGkhQEABcvXkTTNPbs2fNMx/H19eXdd9/NoagKpz///BNN07h69aqpQxHiEZIUCoCAgAD8/f1z9RweHh5ERkbSoEEDo/afPHky5cqVe+TxdevWMXv27KeO40FyevCfvb09devW5dtvv33qY+Y3jRs3JjIyEnd391w/V0BAgOFnqdPpKFu2LL169SIiIuKRfcPDwwkICKBMmTJYWlri7u5O7969CQ8Pf2TfhIQEJk+ezPPPP4+trS1OTk40aNCAefPmkZCQkGVMp0+fpmfPnpQpUwYrKys8PT157bXX2LFjR461Wzw9SQoCAJ1Oh5ubGxYWFs90HCcnJ+zt7Z85nl9++YXIyEiOHTtGp06d6NWrF1u3bn3m4z7J/fv3c/0clpaWuLm5YWaWN39+zZo1IzIyksuXL7Nq1SqOHz9O586dM+xz/PhxfHx8uHr1KqtWrSIsLIzVq1dz7do1fHx8CAkJMewbGxtLkyZNmDdvHv369WPfvn0cPXqUwMBA1q5dm+X7tGXLFnx8fLh27RpLly7lzJkz/PrrrzRs2JAPPvjgmdqZF+9dkaBEvte7d2/VqlWrTJ8/e/asevnll1WxYsVUsWLFVLt27VRoaGiGfVatWqXKly+vrKysVKNGjdSvv/6qALV7926llFIXLlzIsK2UUlOmTFFeXl7K0tJSubi4qDZt2qiEhAQVFBSkgAz/jRs3TimlVIsWLdQ777yT4dzz589XVatWVZaWlqpkyZKqU6dOmbblcXEopZSTk5MaNGiQYTsuLk71799fubu7KxsbG1W7dm31888/Z3jNsWPHVIMGDZSlpaWqUKGCWrt2rfL09FSTJk0y7AOoL7/8UnXr1k3Z29urLl26KKWU2rp1q2rcuLGytrZW7u7uKiAgQEVFRRled+rUKdWmTRvl4OCgbG1tVZUqVdTKlSsNzy9ZskRVqVJFWVlZKUdHR9WsWTN15coVpZRSO3bsUIBhWyml9u/fr5o1a6asra1ViRIlVLdu3dSNGzcMz48bN055e3urDRs2qMqVKytbW1vVokULde7cuUx/lko9/ndn7ty5ClB3795VSiml1+vV888/r2rWrKlSUlIy7JuSkqJq1KihatWqpfR6vVJKqY8//lhZW1ur8+fPP3I+vV6vbt++/dhY7t27p1xdXdWLL7742OdjYmIM/wbUt99+m+H5Vq1aqd69exu2PT091ahRo9RHH32knJyc1AsvvKC6d++uWrdu/cixX3zxRfXWW28Ztp/0/hZlkhQKgKySQkJCgnruuedUy5Yt1ZEjR9SRI0eUr6+v8vb2VsnJyUoppY4cOaI0TVOjRo1SZ8+eVevXr1fe3t5ZJoWff/5Z2dnZqY0bN6pLly6p48ePqy+++EIlJCSohIQENWzYMFW2bFkVGRmpIiMjVVxcnFLq0aQwduxYVaxYMTVv3jz1zz//qKNHj6rJkydn2tb/xpGamqp++OEHBajhw4crpdI/eHx9fVWLFi3U7t27VXh4uFq8eLGysLBQwcHBSqn0DyA3NzfVrl07deLECbV//37VqFEjZWNj80hScHJyUvPmzVNhYWHq3Llzatu2bcrGxkbNnTtXnTt3Th06dEj5+vqq5s2bGz4Ya9asqbp166ZOnz6twsPD1f/93/+pX3/91fDz1ul0asWKFerixYvq5MmTasmSJZkmhcjISGVnZ6e6deumTp48qXbv3q1q1qypmjVrZohz3LhxytbWVrVt21YdOXJEhYSEqLp166qmTZtm63cnIiJCNW/eXOl0OhUfH6+UUiokJOSxH8IPrFy5UgHqxIkTKi0tTTk6Oj6S+I2xfv36xyb8xzE2KdjZ2alx48apf/75R50+fVpt2bJFmZmZqYiICMN+165dUzqdTm3ZskUppYx6f4sySQoFQFZJYenSpcrGxkbdunXL8Nj169eVtbW1WrFihVJKqe7duz/y4bFw4cIsk8Ls2bNVxYoV1f379x973kmTJilPT89HHn84KcTHxytra2s1Y8YMo9v6IA4bGxtVrFgxpdPpFKBKliypwsPDlVLpH6pWVlbqzp07GV779ttvqw4dOiillPr6669VsWLFMuzz999/K+CRpNCnT59H2jBs2LAMj126dEkB6vjx40oppezt7VVQUNBj27Bu3Tplb29v+Cb+X/9NCqNHj1ZlypQxJHGl/vdBvXPnTqVUelLQ6XTq5s2bhn1Wr16tNE1TiYmJjz2PUum/OzqdThUrVkzZ2NgYruwGDx5s2GfNmjUKUMeOHXvsMY4ePaoAtXbtWnXjxg0FqFmzZmV6zsx8/vnnClDR0dFP3NfYpNCyZcsM+6SlpSl3d3c1ffp0w2MzZsxQZcqUUWlpaUop497fokz6FAq406dPU61aNVxcXAyPlSpVisqVK3P69GkAzpw5Q8OGDTO8rlGjRlket0uXLqSkpODp6UlAQADffvstcXFx2Y4tKSmJNm3aZOt1AEFBQYSEhLBp0yZq1KjBwoULKV++PACHDx/m/v37lClThuLFixv+++677wgNDQXS21y1alUcHBwMx6xSpQolSpR45FwvvPBChu3Dhw8zZ86cDMeuVq0agOH4gYGBvPvuu/j6+jJ+/HiOHTtmeH3r1q0pX748Xl5evPnmm3z99ddERUVl+XNq2LAhlpaWhsdq1aqFg4OD4T0EcHd3p2TJkhm2lVLcvHkzy59lgwYNCAkJ4dChQ4wZM4ZGjRoxefLkLF+TGfUM9TOf5bWZ+e97Z2ZmRo8ePTIMTPj222956623DH04xry/RZkkhSJC07Rs7V+mTBnOnj3LN998g6urK5MmTaJy5cpcuXIllyJ89PwVKlSgdevWrF27lj59+nDu3DkA9Ho9Dg4OhISEZPjvzJkzbNq0yXAMY9tcrFixDNt6vZ5hw4Y9cvzQ0FBeeuklAMaMGcO5c+fo0qULp06domHDhowePRqA4sWLc+TIEdavX0+lSpVYtGgRFSpU4OjRo8/0M3k4aTzcPr1en+XrbGxsqFChAjVq1GDixIl4eXnxySefGJ6vVKkSAKdOnXrs6x8kpsqVK1OyZEkcHR05c+ZMtuOvXLkygFGv1TTtkSSSkpLyyH7/fe8AevXqxV9//WV4306ePEnv3r0Nzxvz/hZlkhQKuOrVq3PmzJkM30Rv3LjBP//8Q40aNQCoVq0a+/fvz/C6AwcOPPHYVlZWvPjii0yfPp2//vqLhIQENmzYAKR/QKWlpWX5+mrVqmFtbf3Mo4aqVq3Kq6++SmBgIAA+Pj7cuXOHpKQkKlSokOG/5557znDuv//+m7t37xqO888//3Dnzp0nns/Hx4fTp08/cuwKFSpQvHhxw37ly5enb9++/PTTT0ycOJGFCxcantPpdDRv3pyJEydy9OhRSpcuzapVqx57vurVq3PgwIEMo2dOnDjB3bt3De9hTho/fjxBQUEcOXIESL8qqVGjBjNmzCA1NTXDvqmpqcyYMYPnn3+emjVrYmZmRvfu3fn++++5cOHCI8dWSmX4mT+sTZs2uLq6MmXKlMc+f/v2bcO/XV1duXbtmmE7OTnZ6ERUvXp16tWrx7fffsvKlSupV6+e4UoAjH9/iypJCgVEfHz8I99szp49S/fu3SlZsiRdu3bl2LFjHD16lDfffJMyZcrQtWtXAAYNGsTevXsZO3Ys586dY+PGjcyaNQvI/Nv0smXLWLJkCSdOnODSpUt8//33xMXFGf64vLy8uH79Ovv37ycqKuqxY9OLFy/O4MGDGT9+PAsWLODcuXOcOHGCqVOnZrv9gYGB/Prrr+zfv5+WLVvi7+9Pp06d2LBhA+fPn+fo0aPMmzePJUuWAPDWW29RvHhxevXqxcmTJzl48CDvvPMONjY2T7yCmDhxIr/88guDBg0iJCSE8PBwNm/ezDvvvENiYiLx8fH069eP7du3c+HCBY4fP87mzZsNP5tffvmFL774gqNHj3L58mU2bNjAlStXMnwwPezjjz8mNjaWgIAATp06xZ49e+jZsyfNmjWjWbNm2f5ZPUnFihVp3749o0aNAtJ/B5YvX86lS5d46aWX2LVrF1euXGH37t28/PLLXL58meXLlxt+blOmTKFixYo0bNiQr7/+mhMnTnDhwgXWr19PixYtMp1vYGtry/Lly9mxYwf+/v5s2rSJ8+fP89dffzFz5swMtzj9/f1ZtGgR+/fv59SpUwQEBGRryGmvXr1YtWoVP/zwQ4arBHjy+1vkmbRHQxild+/ejwwBBVTlypWVUulDUl966SXDkNRXXnkl0yGplpaWqmHDhobOxSNHjiilHj/6qFGjRqpEiRLKxsZGVa9eXS1dutRwvPv376tu3bopR0fHLIek6vV6NWfOHFWpUiVlYWGhXF1d1RtvvJFpWzMbkqqUUq1bt1YtWrRQSinDCKhy5copCwsLVapUKdW2bVu1bds2w/7/HZL6448/qpIlS6qZM2ca9iGTUTe7du1SrVq1UsWLFzcMOR0wYIBKSUlRiYmJqlu3bqpcuXLKyspKlSxZUnXp0kVdvnxZKaXUzp07lZ+fn3JxcVFWVlaqQoUKaurUqYZjP2lIqoODQ6ZDUh+2e/duBagLFy5k+vPMbJDC3r17FaB27NhheOzcuXOqV69eqnTp0src3Fy5ubmpXr16qbCwsEdeHx8fryZMmKBq1KhhGEb7wgsvqPnz56uEhIRM41FKqZMnT6ru3bur0qVLKwsLC+Xh4aFee+01Q6e6Uukjstq1a6fs7OxU2bJl1VdfffXYjuaHBw087NatW8rCwkJZWFhkGITxQFbvb1GnKSUrrxVFK1eu5O233yY6Ovqxna+F0aVLlyhXrhwbN26kffv2pg5HiHzJ3NQBiLwxc+ZM/Pz8cHJy4vDhwwwbNozOnTsX6oTw3XffUaZMGby8vLh06RJDhw7F09PzqUZDCVFUSFIoIk6ePMmsWbOIiYnBw8ODHj16MGHCBFOHlauio6MZN24cERERODk50aRJE3788UesrKxMHZoQ+ZbcPhJCCGEgo4+EEEIYSFIQQghhUOD7FB6e4JIdLi4uWZYeKIykzUWDtLloeJY2Z7WWh1wpCCGEMJCkIIQQwkCSghBCCANJCkIIIQwkKQghhDDIk9FHX331FceOHcPBwcFQnfNhSimCgoI4fvw4VlZW9O3b17CgihBCiLyTJ1cKvr6+jBw5MtPnjx8/zvXr15k7dy7vv/8+S5cuzYuwhBBC/EeeXClUq1YtyyUDjxw5QvPmzdE0jUqVKnHv3j1u376No6NjXoQnhCgklFKQnAhxsRB3F+JiUfF30/+dnGTq8HJESkoKiQmJOLR/HRxdc/z4+WLyWkxMTIY1hp2dnYmJiXlsUggODiY4OBiAadOmZXhddpibmz/1awsqaXPRUJjarPR6VHwc+tjb6GPvoL97J/3/sbdRsXf/few2MXF3UXdvo4+9CylZLMaTzWVp8xulwEzpKQakVq+JS9vHL9z0LPJFUsgOf39//P39DdtPO6NPZkAWDdLm/EWlpkB8HMT/+y3+32/zxN+F+Ie249K3iY8Dlcka1NY2YOcAxe2xdC6JKu2BVtz+38cc0OzsDc9jZw9WT151L7+6e/cukydPZtWqVZQrV46ZM2fSvm37XJnRnC+SgpOTU4bGRUdH4+TkZMKIhBDGUMlJ6R/ecQ99yMc/+KB/sP2/50m89/gDaRoUKw7FHdI/wEuXRSv+0Ae63b8f8sUdDB/0moWF4eWO+TgRPqu0tDQ6dOhAeHg4ffv2ZdCgQdjY2OTa+fJFUvDx8WHz5s00adKE0NBQbG1tpT9BiHxGxd5BHdmDOroPom6kf/hntm6yzjzjB7pnhX+30z/0tX+/zT94nmLF0cx0edugfO7BLXSdTsewYcNwd3enVq1auX7ePEkKc+bM4cyZM8TFxfHhhx/SpUsXUlNTAWjTpg116tTh2LFj9O/fH0tLS/r27ZsXYQkhnkAlJaJCDqAO7oIzx0GvhzKeaJVr/O9Dvvi/H/L/fuBT3B5sihXYWzWmppRi3bp1jB07lpEjR/LWW2/x0ksv5dn58yQpfPrpp1k+r2ka7777bl6EIoR4ApWaCqePow7tRIUchPvJ4FQSre1raC+0QCtbztQhFloREREMHz6c7du3U7duXerXr5/nMeSL20dCCNNSej2En01PBEf2pHfwFrNDa+SH9kILqFAVzUwKIOSmDRs2MGzYMNLS0pgwYQJvv/02Ol3e31KTpCBEEaYiLqEO7kQd2gXRN8HSEq1WA7QGLaB6HTRziycfROQIBwcH6tSpw/Tp03nuuedMFockBSGKGBV9C3V4F+rgTrh6EczMoGottA5vodVpgGZta+oQi4TU1FSWLFnC/fv3GTBgAH5+fvj6+pq8L0aSghBFgLoXhzq6Nz0RnDud/qBXJbQ330er3wTNXkb75aXTp08TGBjIyZMnad++PUopNE0zeUIASQpCFFoqORl18jDq4J9w6hikpYJbGbQO3dM7jF1LmzrEIic5OZkvv/ySBQsWUKJECRYvXswrr7ySL5LBA5IUhChEVFoaySEH0W/diDp2IL0OkIMTWstX0Br4wnPl89UHUFFz4cIFvvrqKzp27Mi4cePy5SRdSQpCFHBKKbgYmt5hfHg3d2LvgI0tmk+T9A7jyjVkYpgJ3bt3jy1bttCpUyeqVKnCzp078fT0NHVYmZKkIEQBpa5H/DtyaCfcjARzc3i+Pg7+7YkrVwnNwtLUIRZ5u3btYujQoVy9epWaNWtSsWLFfJ0QQJKCEAWKuhODOrw7vcP4Ulh6zaDKNdFeegOtbiM02+JYu7gQX0jrABUUd+7cYdKkSaxevZry5cvz888/U7FiRVOHZRRJCkLkcyrhHur4/vREcPav9Kqhz3mjde6DVr8ZmqOzqUMUD0lLS6Njx46cP3+ejz/+mIEDB2JtbW3qsIwmSUGIfEilpMBfR9Af2gknDkNqCpR0Q3ulc/rIodJlTR2i+I+YmBhKlCiBTqdj+PDhlClThpo1a5o6rGyTpCBEPqH0ejh3Kr2f4Oi+9DLTdg5ozduivdAcyleWkUP5kFKKn376ifHjxzNixAh69OjBiy++aOqwnpokBSFMSCkFV87/22G8G+5Epy8GU6chWoPmULU2mgnq3wjjXL16lWHDhvHnn3/i4+NDw4YNTR3SM5OkIIQJqFvXUYf+LTUReQV0OqhRD63z2+m1h6ysTB2ieIKff/6ZESNGoJRi8uTJ9O7dG7NCUDRQkoIQeUTF3U1fpObgTgg/m/5gxWpob32UPqeguL1pAxTZ4uzsTP369fn8888pW7bw9PFIUhAiF6UvUnMwPRE8vEhNp15oLzRHc3Y1dYjCSCkpKSxevJiUlBQGDhyIr68vLVq0KHT9PJIUhMhhKjUVzhxP7ycwLFLjgtbmNbQGskhNQXTq1CkGDx7MqVOn6NChQ74qYJfTJCkIkQOUXg/nz6YngocXqWnol15qQhapKZCSkpL44osvWLhwIU5OTixZsoSXX37Z1GHlKkkKQjwDFXEZdfBPWaSmkLp48SKLFy/mjTfeYOzYsZQoUcLUIeU6SQpCZJOKufW/kUOySE2hc+/ePTZt2sQbb7xBlSpV2LVrl0lXQstrkhSEMIIsUlM0/PnnnwwdOpRr165Rq1YtKlasWKQSAkhSECJT6n4y6sTh9Cqkfx2VRWoKsZiYGCZMmMBPP/1EhQoVWL9+fYEpYJfTJCkI8RCVlgZnT6b3Exw/AEmySE1h96CA3cWLF+nfvz8DBgwoUAXscpokBVHk/XeRGh4sUlNPFqkpzKKjo3F0dESn0zFq1CjKlClDjRo1TB2WyUlSEEVWZovUmDVoATV9ZJGaQkopxdq1a5kwYQIjRoygZ8+etG3b1tRh5RuSFESRYswiNaLwunLlCkOHDmXXrl00aNCAxo0bmzqkfEeSgij00hepOYA6+KcsUlOE/fTTT4wYMQJN0/jss8/o2bNnoShgl9MkKYhCSaWkwKmj3Dm+H/3hPf9ZpKY5WmkPU4co8ljJkiVp2LAh06ZNo0yZMqYOJ9+SpCAKDcMiNYd2oY7uhYR73LcvIYvUFFEpKSl89dVX6PV6Bg4cSIsWLWjRooWpw8r3JCmIAi19kZoL/3YY73pkkRqXZq2Ivn3H1GGKPPbXX38xaNAgzpw5w2uvvWYoYCeeTJKCKJBUQjxq1xbUvu1ZLlKj6eRXvChJTEzkiy++YNGiRTg7O7Ns2bICvTSmKeTZX0xISAhBQUHo9XpatWpFx44dMzwfFRXFggULuHfvHnq9nu7du1O3bt28Ck8UECr6Fip4I2r3VkhOhAqySI34n8uXL/P111/TpUsXRo8eXSQK2OW0PEkKer2eZcuWMXr0aJydnRkxYgQ+Pj4ZViv6+eefadSoEW3atOHq1atMnTpVkoIwUFcuoLasS59cpmnpo4bavIbm4WXq0ISJxcXF8X//93+8/PLLVK5cmT179hSqldDyWp4khbCwMNzc3ChVqhQAjRs35vDhwxneOE3TSEhIACAhIQFHRykwVtQppeDvEPRb1sOZkPS+glbt0fxfRXMqaerwRD6wbds2hg8fzvXr19m+fTsVK1aUhPCM8iQpxMTE4Oz8v7Hgzs7OhIaGZtinc+fOTJ48mc2bN5OcnMyYMWMee6zg4GCCg4MBmDZtGi4uLk8Vk7m5+VO/tqAqKG1Wqakk7d1Gwi+rSL0QipmjM7Y9PsTmxdcwK2aXrWMVlDbnpKLQ5qioKIYMGcKqVauoWrUqP/74Iz4+PqYOK0/l1vucb3rh9u7di6+vL+3bt+fcuXPMmzePWbNmPTK5xN/fH39/f8N2VFTUU53PxcXlqV9bUOX3NqukRNSerag/NkLMLSjtgdb7E2jgS6KFBYmJyZCYnK1j5vc254bC3ua0tDR8fX25fPkyAwcO5JNPPqFMmTKFus2P8yzvs7u7e6bP5UlScHJyIjo62rAdHR2Nk5NThn22b9/OyJEjAahUqRIpKSnExcXh4OCQFyEKE1J3b6O2/YrauQkS7kGl6ph1/xBq1pMlLIXBrVu3cHZ2RqfTMWbMGMqWLUu1atVMHVahkyd/cd7e3kRGRnLz5k1SU1PZt2/fI5d6Li4unDp1CoCrV6+SkpKCvb2MJinMVORV9CvmoR/+DmrzOqhaC7MRM9ANmYpWq74kBAGk9y398MMPNG/enO+++w6ANm3aSELIJXlypaDT6ejTpw9TpkxBr9fj5+eHh4cHa9aswdvbGx8fH3r16sXixYv5/fffAejbt69MNimElFIQegb91vVw4hBYWKI1bY3WugOaa+aXtKJounTpEkOGDGHv3r00atSIZs2amTqkQk9TSilTB/Esrl279lSvK+z3XR/HlG1W+jQIOYh+8zq4cA6K26H5vZL+n13u3SKU97ngWrt2LSNHjkSn0zF69GjeeuutTAvYFZY2Z0eB7lMQRZe6n4zatx31x4b0NQtKuqF1/xCtcSvDrGMhHsfNzY0mTZowderULD/ERM6SpCByhdLrUZt/RgVvhLi7UK4iZh8OgzoNZRUz8Vj3799nwYIF6PV6Bg8eTPPmzWnevLmpwypyjE4KJ0+eZO/evdy9e5fhw4cTHh5OYmKiLF8nHkutW4Hash5q+mDWthNUqi59RCJTISEhDB48mLNnz/L6669LATsTMmp4x6ZNm1iyZAmlS5fm77//BsDS0pLVq1fnanCiYNLv3orash7N72V0/ceiVa4hf+DisRITE5k4cSLt27fnzp07BAUFMXfuXPl9MSGjksL//d//MWbMGDp27Gjo6ClTpsxTd/KKwkudPYn6fiFUr4PW9T1ThyPyucuXLxMUFET37t3ZsWMHbdq0MXVIRZ5Rt48SExMfmU6dmpqKubl0SYj/Udcj0C+cBq7umL0/FE0nfQfiUbGxsWzatImuXbsaCtjJSmj5h1FXClWrVmXDhg0ZHtu0aRPVq1fPjZhEAaTiY9HPmwg6HWafjEGzLWbqkEQ+FBwcjJ+fH4GBgYSFhQFIQshnjEoKffr04dChQ/Tr14+kpCQGDBjA/v376d27d27HJwoAlZqSfoUQE4VZ35FoJd1MHZLIZ6Kjo/n444/p3bs3JUqUYOPGjVSoUMHUYYnHMOr+j6OjI1OnTiU8PNxQf6RChQqZTiQRRYdSCvXdV3DuFNq7g9EqVDV1SCKfSUtLo2PHjly5coXAwED69euHpaWlqcMSmTDqU3369OlomkaFChVo1KgRlSpVwszMjJkzZ+Z2fCKfU1vWofZuQ2v/JmYNZFF08T83b95Er9ej0+kYO3YsmzdvZuDAgZIQ8jmjksLp06ez9bgoGtSx/ah1K9NXQWvfzdThiHxCr9fz7bff0qxZM7799lsAWrduTZUqVUwcmTBGlreP1qxZA6SPNHrw7wdu3LhByZKy+lVRpS6FoV82C7wqob09QMaVCwAuXLjAkCFD2L9/P02aNMHX19fUIYlsyjIpPFgDQa/XZ1gPAdKLMXXp0iX3IhP5lrodjX7+ZLArgVm/kWgWcjtApH+JHDlyJBYWFsyYMYNu3brJl4UCKMuk0LdvXyB90ZuHVzsTRZdKSkQ/fxIkJWI2fDqavaylLdK5u7vTokULpkyZQunSpU0djnhKRo0+epAQEhMTiYuL4+Fq26VKlcqdyES+o/R69Mtmw5WL6XMRyniaOiRhQsnJycyfPx+9Xs+QIUNo1qyZrHdQCBiVFK5evcrcuXO5dOnSI8/9t69BFF5q3QoIOYj25vtoNeuZOhxhQseOHSMwMJB//vmHzp07SwG7QsSo0UdLly6levXqfPPNN9ja2hIUFETr1q3p169fbscn8omHi9yZtWpn6nCEiSQkJDB+/HheffVVYmNjWbFiBXPmzJGEUIgYlRQuXbrEW2+9RbFixVBKYWtrS48ePeQqoYiQInfigatXr7Jy5Up69uzJjh07pK+xEDLq9pGFhQVpaWmYm5tjZ2dHVFQUxYoVIz4+PrfjEyYmRe7E3bt3+f333+nevTuVKlViz549shJaIWZUUqhSpQr79+/H19eXhg0b8tlnn2FhYSEF8Qo5KXIntmzZwogRI4iKiuKFF16gQoUKkhAKOaOSwqBBgwz/7tatGx4eHiQlJdGihZQ1KKwyFLkbPFmK3BUxUVFRjBkzho0bN1K1alWCgoKkgF0Rke0FEczMzGjevDmpqakEBwfz4osv5kZcwoTSi9wtlCJ3RVRaWhodOnTg2rVrDB06lL59+2JhYWHqsEQeeWJS+Ouvv7h48SJubm7Ur1+ftLQ0tmzZwi+//ELx4sUlKRRC6UXugtHaSZG7ouT69eu4urqi0+mYOHEiHh4eVKpUydRhiTyWZVLYsGEDP//8Mx4eHly5coW2bdty+vRpLCws+OCDD6hbt25exSnySIYid69Kkbui4EEBu88++4wRI0YQEBBAq1atTB2WMJEsk0JwcDATJkygfPnynDt3jjFjxtCrVy9eeeWVvIpP5KEMRe4C+svY8yIgPDycoUOHcuDAAZo1a0bLli1NHZIwsSyTQlxcHOXLlwfS6x9ZWFjw8ssv50lgIm89UuTO0srUIYlc9sMPPzB69GisrKyYPXs2Xbp0kS8C4sl9CkopQ62jB51Ner3e8LysvlbwSZG7oqls2bL4+fkxZcoUqWEmDLJMCklJSbz55psZHvvvtsxqLtikyF3RkZyczJw5cwAYNmyYFLATj5VlUpg/f35exSFMRIrcFQ2HDx8mMDCQsLAw3nzzTSlgJzKVZVKQldUKN0ORO9+X0VrK4IHC6N69e3z++ed88803uLu78/3338tqaCJL2Z689rRCQkIICgpCr9fTqlUrOnbs+Mg++/bt48cff0TTNDw9PRkwYEBehVfkGIrcVauD9uZ78q2xkIqIiOC7774jICCA4cOHU7x4cVOHJPK5PEkKer2eZcuWMXr0aJydnRkxYgQ+Pj6ULVvWsE9kZCQbNmxg0qRJFC9enLt37+ZFaEVShiJ3H0iRu8Lm9u3bfPfdd/To0YNKlSqxb98+3NykTIkwTp4MHQoLC8PNzY1SpUphbm5O48aNOXz4cIZ9tm3bRtu2bQ3fZBwcHPIitCJHitwVbps2baJ27dqMHDmSsLAwAEkIIluydaUQFRVFTExMtqe+x8TE4OzsbNh2dnYmNDQ0wz7Xrl0DYMyYMej1ejp37kzt2rUfOVZwcDDBwcEATJs2DRcXl2zF8oC5uflTv7ag0imFbuks9DFROE6aj2WVGqYOKdcVlff5+vXrDBw4kHXr1lG7dm02bNhAnTp1TB1Wnikq7/PDcqvNRiWFqKgovvzySy5evAjAt99+y4EDBwgJCeHDDz/MkUD0ej2RkZGMGzeOmJgYxo0bx8yZMylWLOM3WX9//wwLe0RFRT3V+VxcXJ76tQWRUgrLNUtIOX0c7Z1BxLqUhiLQ/qLwPqelpdGiRQsiIyMZPnw4o0eP5u7du4W+3Q8rCu/zfz1Lm7Mqf27U7aOvv/6aOnXqsGLFCszN0/PI888/z8mTJ40KwMnJiejoaMN2dHQ0Tk5Oj+zj4+ODubk5rq6ulC5dmsjISKOOL55MbVlH0rbf0ovcNfQ1dTgiB1y7dg29Xo9Op2PSpEls3bqVTz75RCqaimdiVFIICwujY8eOGWYv29rakpCQYNRJvL29iYyM5ObNm6SmprJv3z58fHwy7PPCCy9w+vRpAGJjY4mMjJRZljnkQZE7q6atpMhdIaDX6/nmm29o0aIFK1euBKBly5ay3oHIEUbdPnJwcOD69esZLjmuXr1q9P0snU5Hnz59mDJlCnq9Hj8/Pzw8PFizZg3e3t74+PhQq1YtTpw4wcCBAzEzM6NHjx7Y2dk9XauEwcNF7hw+Hk10XJypQxLPICwsjMDAQA4fPoyvr6+skSxynFFJoX379nz++ed07NgRvV7Pnj17WL9+/WPnGmSmbt26j5Ta7tq1q+HfmqbRu3dvevfubfQxRdYMRe6KO6QXubOyAkkKBdaqVasYPXo0NjY2zJkzhzfeeEPml4gcZ1RSaNmyJXZ2dgQHB+Ps7MyuXbvo2rUrL7zwQm7HJ56SSk76X5G7YZ9LkbtCwNPTE39/f6ZMmSLVBkSuMSop6PV66tevT/369XM7HpEDlF6PfumDInej0cqWM3VI4ikkJSXxxRdfADBixAiaNGlCkyZNTByVKOyM6mh+7733WLp0KWfPns3teEQOSC9ydwCt6ztoNX2e/AKR7xw+fJg2bdowf/58YmJiDOXrhchtRl0pjB49mr179/Lll19iZmZGkyZNaNq0Kc8991xuxyeyKWORu3amDkdkU3x8PNOmTWP58uWULVuWVatW0aKFrJMt8o5RScHLywsvLy969OjBmTNn2LNnDxMmTMDR0ZGZM2fmdozCSFLkruCLjIzkhx9+oE+fPgwbNuyRyZtC5LZsF8Rzd3enbNmyhIeHc/369dyISTwFKXJXcMXExPDrr7/Su3dvKlasyL59+2SOjjAZo5LCvXv3OHjwIHv27CE0NJTnn3+eDh06PDIBTZiGuheHft4kMDOTIncFiFKK33//nVGjRnHnzh2aNGlChQoVJCEIkzIqKXzwwQdUrlyZpk2bMnjwYLmkzUdUagr6r6ZCzE3MBk9GKykVMQuCGzduMGrUKDZt2sTzzz/PqlWrZEayyBeMSgrz5s3D0VHGuec3SinUdwvh3Cm0dwahVahm6pCEEdLS0ujUqRPXr19n9OjRvPfee4aaYkKYWqa/iWfOnKFatfQPmYiICCIiIh67X40ahb/8cn6ltqxD7Q1Ga9dVitwVABEREZQuXRqdTseUKVPw8PDA29vb1GEJkUGmSWHZsmXMmjULgIULFz52H03TmD9/fu5EJrL0oMidVr8Z2qvdTR2OyEJaWhrLly9n6tSpjB49moCAAFknWeRbmSaFBwkBYMGCBXkSjDCOuhSOftlsKFcRLaC/DD3Nx0JDQxk8eDBHjx6lZcuWtG7d2tQhCZElo2Y0T58+/bGPyxyFvJde5G4SFLfH7ONRaJZWpg5JZOK7776jTZs2nD9/nrlz57Jy5UrKlClj6rCEyJJRvVsP1jkw9nGROwxF7hITMRsuRe7yOy8vL1588UUmTZpU5JaKFAVXlklhzZo1AKSmphr+/cCNGzekUmMekiJ3+V9iYiKzZ89G0zRGjhwpBexEgZRlUniwhKZer8+wnCakrw/apUuX3ItMZGAocvfme1LkLh86cOAAgYGBXLhwgZ49e6KUkr4eUSBlmRT69u0LQKVKlWSFJxOSInf5V1xcHJ999hkrV67E09OTNWvW0LRpU1OHJcRTyzQp3Lx5E1dXVwBq1qzJjRs3HrufTMnPXVLkLn+7ceMGa9eu5f3332fIkCHY2tqaOiQhnkmmSSEwMNCwKHj//v0zPcB/+xpEzpEid/lTTEwMGzduJCAggAoVKnDgwAHpXxOFRqZJ4UFCAPngNwUpcpf/KKXYuHEjY8aMITY2lmbNmuHt7S0JQRQqRs1T+K8bN25w8+bNnI5FPESt/Sa9yF2/kVLkLh+4fv06ffr0oW/fvpQtW5ZNmzZJiQpRKBk1T2HOnDm89NJLVK5cmR07drB06VLMzMx4++23admyZW7HWOSoOzGogzvRmreVInf5QFpaGq+//jrXr19nzJgxvPvuu1LAThRaRv1mnzp1io8//hiA3377jTFjxlCsWDFmzJghSSEXqB3/B/o0NP/2pg6lSLt69aqhgN1nn33Gc889h5eXl6nDEiJXGXX7KDU1FXNzc2JiYoiPj6dKlSp4eHhw9+7d3I6vyFHJyaidm6BWAzRXd1OHUySlpaWxePFiWrRoYehba9GihSQEUSQYdaVQrlw51q9fz61bt6hbty6QPgLDxsYmV4MritT+7XAvDrPWHUwdSpF09uxZAgMDOX78OP7+/rRt29bUIQmRp4y6Uvjwww+5fPky9+/fp2vXrgCcO3dOJunkMKXXo7ZtBM8KUFH6EvLaypUrefHFF7l06RILFixg+fLluLvL1ZooWoy6UnBzc2PAgAEZHmvYsCENGzbMlaCKrFNH4XoE2ruDZZJaHnpQkqJixYq0a9eOCRMm4OzsbOqwhDAJo4dQ7Nixg127dhETE4OTkxPNmzfHz88vN2MrcvR//AKOLmj1pIhaXkhMTGTGjBnodDpGjRpFo0aNaNSokanDEsKkjEoK69atY+fOnbRv3x4XFxeioqLYuHEjt2/fplOnTrkdY5GgLp+HsyfR3ghAk+GOuW7fvn0MGTKEixcv0rt3bylgJ8S/jPr02bZtG+PHj88wc7NWrVqMGzdOkkIOUcG/gJU1WrM2pg6lUIuNjWXy5Ml8//33lCtXjrVr10p5ayEeYlRHc3JyMvb29hkes7Oz4/79+7kSVFGj7kSjDu1Ga+KPZlvc1OEUajdv3mTdunV8+OGHBAcHS0IQ4j+MSgq1a9dm7ty5XLt2jfv37xMREcH8+fOpVauW0ScKCQlhwIABfPLJJ2zYsCHT/Q4cOECXLl0IDw83+tgFnWGyWiuZrJYboqOj+eabbwCoUKECBw8eZMyYMTKkWojHMOr2UZ8+ffjmm28IDAwkLS0Nc3NzGjVqxNtvv23USfR6PcuWLWP06NE4OzszYsQIfHx8KFu2bIb9EhMT2bRpExUrVsx+SwoolZyE2rkZajdAcy1t6nAKFaUUq1ev5tNPPyU+Pp4WLVrg7e0tI4uEyMITk0JCQgLXr1/nnXfeoW/fvsTFxWFnZ4eZmfG19MLCwnBzczOsvdC4cWMOHz78SFJYs2YNHTp0YOPGjdlsRsH1v8lqHU0dSqESERHBiBEj2LZtG3Xq1GHWrFlSwE4II2SZFI4dO8YXX3zB/fv3sba2ZsiQIdSoUSPbJ4mJicnw7czZ2ZnQ0NAM+5w/f56oqCjq1q2bZVIIDg4mODgYgGnTpj31gujm5uYmX0xd6fVEb/8drUJVnBo2y/XRL/mhzXkhNTWVpk2bcuPGDWbPns2HH36IrgitRVFU3ueHSZtz8LhZPblmzRreeust/Pz82LZtG6tXr2by5Mk5HoRer2flypWG5T+z4u/vn2Fp0KioqKc654OhtaakThxGH3kF7b3AR9bAzg35oc256cqVK7i7uxsK2Hl6elKvXr1C3ebHKezv8+NIm7Mnq5n6Wd4DunHjBi+++CJWVla0bduW69evP1UATk5OGT70oqOjcXJyMmwnJSVx5coVJkyYQL9+/QgNDWX69OmFvrNZ/8cGcHJBq9vY1KEUaKmpqSxatAhfX19WrFgBQPPmzfH09DRxZEIUPFleKSilDP/W6XSkpaU91Um8vb2JjIzk5s2bODk5sW/fvgxLfNra2rJs2TLD9vjx4+nZs2ehvgesLofDP3+hvfG2TFZ7BmfOnCEwMJATJ07Qtm1bXn75ZVOHJESBluWnUXJyMuPGjTNsJyUlZdgGmDBhwhNPotPp6NOnD1OmTEGv1+Pn54eHhwdr1qzB29sbHx+fpwy/4FJ/bAQrG7RmrU0dSoG1fPlyxo0bh4ODAwsXLqR9+/YyK1mIZ5RlUvjwww8zbD9LraO6desaym4/8KDi6n+NHz/+qc9TEKjb0ajDu9B8X5bJak/hQUmKKlWq8OqrrzJhwoQMtyOFEE8vy6Tg6+ubR2EULWrH76DXy2S1bEpISODzzz/H3NycMWPGSKVeIXKB8ZMNRI4wTFar0xCtpJupwykwdu/eTatWrVi6dCn379/P0N8lhMg50sOZx9S+7ZAQLyurGenu3btMmjSJH374AS8vL9atW0eDBg1MHZYQhZYkhTyk9Pr0aqhelcC7qqnDKRBu3brFL7/8Qr9+/Rg4cKDUKxIil0lSyEsnD8PNSLT3e8gomSw8SATvvvuuoYCddCQLkTeMSgopKSn89NNP7N27l7i4OFasWMGJEyeIjIzkxRdfzO0YCw39H7+AU0mZrJYJpRTr1q1j7NixJCQk0LJlS8qXLy8JQYg8ZFRH84oVK7hy5Qr9+/c3fMP18PBg69atuRpcYaIuhcO5U2it2qEVoTo8xoqIiKBXr170798fb29vtm7dSvny5U0dlhBFjlFXCocOHWLu3LlYW1sbkoKTkxMxMTG5Glxhkr6ymg1aU1lZ7b9SU1N54403iIqKYtKkSfTu3btIFbATIj8xKimYm5uj1+szPBYbG4udnV2uBFXYpE9W243m9wqabTFTh5NvXLp0ibJly2Jubs706dMpV64cHh4epg5LiCLNqNtHDRs2ZP78+dy8eROA27dvs2zZMho3lnvjxlA7fgO9QmvZztSh5AupqaksWLAAPz8/li9fDkCzZs0kIQiRDxiVFLp3746rqyuDBw8mISGB/v374+joSOfOnXM7vgJPJSWmT1arK5PVAE6dOkW7du347LPPaNmyJe3aSaIUIj8x+vZRQEAAAQEBhttGMqTSOGr/dki4JyurAUFBQYwfPx5HR0e+/vprXnnlFVOHJIT4D6OSwo0bNzJsJyYmGv79YIlN8SilT0MFbwSvSmjeVUwdjsk8KGBXtWpVXnvtNcaNG4ejo6OpwxJCPIZRSeHhtQ/+a82aNTkWTKFjmKzW09SRmMS9e/cMBezGjh0rBeyEKACMSgr//eC/c+cOP/74I1WrSqmGrPxvslojU4eS53bu3MnQoUOJiIigT58+hqsFIUT+9lRVUkuUKEFAQACrVq3K6XgKDXUpDM6dRmvVvkhNVrtz5w4DBw6ke/fuWFlZsW7dOiZOnCgJQYgC4qlrH127do3k5OScjKVQUX/8AtY2aE2L1spqUVFR/P7773z88ccMHDgQa2trU4ckhMgGo5LC2LFjM3zTS05O5sqVK7zxxhu5FlhBpmKiUEf2oPm1KxKT1W7evMmGDRt4//33qVChAgcOHJB6RUIUUEYlhZYtW2bYtra2xtPTk9KlS+dKUAVd+spqCq1V4R6Dr5Tixx9/ZMKECSQmJuLv7y8F7IQo4J6YFPR6PadOneKDDz7AwsIiL2Iq0FRSImrXZrS6jdBcCu9w3StXrjBs2DB27txJ/fr1mTlzphSwE6IQeGJSMDMz4+TJk9JRaCS1bxsk3EMrxCurpaam0rlzZ2JiYpgyZQq9evXCzExWdhWiMDDqL/mVV15h7dq1pKam5nY8BZphspp3lUI5We3ChQukpaVhbm7OrFmz2L59OwEBAZIQhChEsrxS2LNnD02bNmXz5s3cuXOH33//HXt7+wz7LFy4MFcDLFBOHIZb1zF7vbepI8lRKSkpLFy4kC+++ILRo0fzzjvv0KRJE1OHJYTIBVkmhSVLltC0aVM++eSTvIqnQNP/sQGcXaF24Zm1+9dffzF48GBOnz5Nu3btePXVV00dkhAiF2WZFJRSAFSrVi1PginI1IVQCD2D1uWdQjNZbdmyZUyYMAFnZ2eWLl3KSy+9ZOqQhBC5LMuk8GDkUVZq1KiRowEVVCq48ExWe1CSokaNGrzxxhuMHTuWEiVKmDosIUQeyDIppKSksGjRIsMVw39pmsb8+fNzJbCCRMXcSp+s1qo9mo2tqcN5avHx8UydOhVLS0vGjRtHgwYNaNCgganDEkLkoSyTgrW1tXzoG0Ft/x0UaK3amzqUp7Zjxw6GDRvGtWvXePfdd6WAnRBF1FPXPhLp0ierbUGr1xjN2dXU4WRbTEwMEyZM4KeffqJixYps2LABHx8fU4clhDARozqaRebU3m2QWHAnq92+fZvNmzfz6aef0r9/f6ysrEwdkhDChLJMCitXrsyxE4WEhBAUFIRer6dVq1Z07Ngxw/O//fYb27ZtQ6fTYW9vz0cffUTJkiVz7Py5QenTUNv+naxWvrKpwzHajRs3WL9+PR988AHe3t4cPHhQOpKFEMBTrqeQXXq9nmXLljFy5Ei++OIL9u7dy9WrVzPsU65cOaZNm8bMmTNp2LAh3333XV6E9mxCDqVPVisg6y8rpVi9ejW+vr7MmDGDCxcuAEhCEEIY5ElSCAsLw83NjVKlSmFubk7jxo05fPhwhn1q1KhhuHVRsWJFYmJi8iK0Z6L/4xdwKQV18v8IncuXL/Pyyy8zePBgqlWrxtatW6WAnRDiEXnS0RwTE4Ozs7Nh29nZmdDQ0Ez33759O7Vr137sc8HBwQQHBwMwbdo0XFxcniomc3Pzp34tQMq5M8SEncGuzwBsXfN3NdTU1FQaN25MTEwM8+bN49133y0y9Yqe9X0uiKTNRUNutTnfjT7atWsX58+fZ/z48Y993t/fH39/f8N2VFTUU53HxcXlqV8LoP9pBdjYcq9OIxKe4Ti56fz583h6eqLT6Zg5cyZ16tTBxsamQFyF5ZRnfZ8LImlz0fAsbXZ3d8/0uTz5uujk5ER0dLRhOzo6+rELsZw8eZL169czdOjQfL12g4q+hTq6F61ZGzTr/DdZLSUlhTlz5tCqVSuCgoIAaNy4MR4eHiaOTAiR3+VJUvD29iYyMpKbN2+SmprKvn37HhkLf+HCBZYsWcLQoUNxcHDIi7Cemtr+GwBay/y3stqJEyd46aWXmDFjBi+99NIjo7yEECIreXL7SKfT0adPH6ZMmYJer8fPzw8PDw/WrFmDt7c3Pj4+fPfddyQlJTF79mwg/dJo2LBheRFetqikBNTurWj1muS7yWpLly5lwoQJuLq6EhQURJs2bUwdkhCigMmzPoW6detSt27dDI917drV8O8xY8bkVSjPJD9OVntQkqJWrVp069aNUaNG5furLSFE/pTvOprzM8PKahWqonlVMnU4xMXFMWXKFKysrJgwYQL169enfv36pg5LCFGAFY1xiTkl5CBE3cAsH1wlbNu2DT8/P77//nvMzc2lJIkQIkfIlUI2GCar1TbdZLWYmBjGjRvHunXrqFy5Ml9//fUjt+WEEOJpyZWCkdT5fyDsbzT/V9HMTLey2p07d/jjjz8YNGgQmzdvloQghMhRcqVgJBW8EWyKoTVplefnjoyMZP369Xz00UeUL1+egwcPSkeyECJXyJWCEQyT1Zrn7WQ1pRTff/89fn5+zJo1i4sXLwJIQhBC5BpJCkZQO/8vfWU1v7ybrHbx4kW6dOnC0KFDqVGjBsHBwXh5eeXZ+YUQRZPcPnoClZKC2hMMteqjOefN+g6pqal07dqVO3fu8Pnnn9O9e/ciU8BOCGFakhSeQB3bB3F3MWvxUq6fKywsjHLlymFubs6cOXPw9PTMsnCVEELkNPn6+QRq5yYo6QbVaufaOe7fv8/s2bPx9/dn+fLlADRq1EgSghAiz8mVQhZUxCUIPYP2RgBaLt2+OX78OIGBgZw9e5bXXnuNTp065cp5hBDCGJIUsqB2bgJzC7TG/k/e+SksWbKEiRMn4urqyvLly2ndunWunEcIIYwlt48yoZISUft3oPk0QbOzz9lj/1uSonbt2nTv3p0dO3ZIQhBC5AtypZAJdeBPSEpEy8EO5tjYWCZPnoy1tTUTJ06UAnZCiHxHrhQeQ+n16TOYPSuAd5UcOebWrVvx8/Pjhx9+wMrKSgrYCSHyJblSeJy/jsKNCLR3B6Np2jMdKjo6mrFjx7JhwwaqVq3KsmXLqF27ds7EKYQQOUySwmPo/9gATi5o9Zo887FiY2PZvn07gYGB9OvXD0tLy2cPUAghcokkhf9Ql8Lhn7/Q3ngbzfzpfjwRERGsW7eOjz/+GC8vLw4ePIi9fc52VgshRG6QPoX/UH9sACsbtGbZX99Yr9ezcuVKWrZsyZdffmkoYCcJQQhRUEhSeIiKiUId2YPWrDWabbFsvfb8+fN06dKFESNGULt2bbZt2yYF7IQQBY7cPnqI2v4b6BVaq/bZel1qairdunUjNjaWWbNm0bVr12fuoBZCCFOQpPAvlZSI2rUFrW4jNJdSRr0mNDQULy8vzM3NmTt3Lp6enri5ueVypEIIkXvk9tG/1N5gSLyH1qbjE/dNTk5m5syZ+Pv7ExQUBECDBg0kIQghCjy5UgCUPi19spp3FbTylbPc9+jRowQGBnLu3Dlef/11Xn/99TyKUgghcp9cKQCEHISoG5i17pjlbosWLaJDhw7Ex8fz7bffMnfuXJycnPImRiGEyAOSFAD91g3gUgrqNHj883o9APXq1aNnz55s376dli1b5mGEQgiRN4r87SMVfhbCz6K9+R6amS7Dc3fv3mXixInY2NgwefJkKWAnhCj0ivyVgvrjF7AphtYk45oJmzdvxs/Pjx9//JFixYpJATshRJFQpK8UVNQN1LH9aG06olnbABAVFcWoUaP47bffqF69OitWrKBmzZomjlQIIfJG0U4K234FMw2tZTvDY3FxcezevZthw4bx0UcfYWFhYcIIhRAibxXZpKC/F4/a/QeaT1OuJSbz05df0r9/f7y8vDh06BDFixc3dYhCCJHn8iwphISEEBQUhF6vp1WrVnTs2DHD8ykpKcyfP5/z589jZ2fHp59+iqura67Fk/jHRkhO5LckMwL9/NDr9bz66qt4eXlJQhBCFFl50tGs1+tZtmwZI0eO5IsvvmDv3r1cvXo1wz7bt2+nWLFizJs3j1deeYXvv/8+1+JRqanEbljFqRSNj6bOpF69euzYsUMK2Akhirw8SQphYWG4ublRqlQpzM3Nady4MYcPH86wz5EjR/D19QWgYcOGnDp1KtdG/KQd3o3Z3RgWhUYwe/ZsVq1ahYeHR66cSwghCpI8uX0UExODs7OzYdvZ2ZnQ0NBM99HpdNja2hIXF/fIWgTBwcEEBwcDMG3aNFxcXLIdT7KbOxEVazB3/o+ULlMm268vqMzNzZ/q51WQSZuLBmlzDh43x4+Yy/z9/fH3/9+cgqioqOwfxKsy5ad/TVRU1NO9voBycXEpUu0FaXNRIW3OHnd390yfy5PbR05OTkRHRxu2o6OjH6kZ9PA+aWlpJCQkYGdnlxfhCSGE+FeeJAVvb28iIyO5efMmqamp7Nu3Dx8fnwz71KtXjz///BOAAwcOUL16dVmoRggh8lie3D7S6XT06dOHKVOmoNfr8fPzw8PDgzVr1uDt7Y2Pjw8tW7Zk/vz5fPLJJxQvXpxPP/00L0ITQgjxEE0V8KI+165de6rXyT3IokHaXDRIm7PH5H0KQgghCgZJCkIIIQwkKQghhDCQpCCEEMKgwHc0CyGEyDlF9kph+PDhpg4hz0mbiwZpc9GQW20usklBCCHEoyQpCCGEMCiySeHhonpFhbS5aJA2Fw251WbpaBZCCGFQZK8UhBBCPEqSghBCCIMCt8hOdoWEhBAUFIRer6dVq1Z07Ngxw/MpKSnMnz+f8+fPY2dnx6effoqrq6tpgs0hT2rzb7/9xrZt29DpdNjb2/PRRx9RsmRJ0wSbQ57U5gcOHDjA7NmzmTp1Kt7e3nkbZA4zps379u3jxx9/RNM0PD09GTBgQN4HmoOe1OaoqCgWLFjAvXv30Ov1dO/enbp165om2Bzw1VdfcezYMRwcHJg1a9YjzyulCAoK4vjx41hZWdG3b1/Kly//bCdVhVhaWpr6+OOP1fXr11VKSooKDAxUV65cybDP5s2b1eLFi5VSSu3Zs0fNnj3bFKHmGGPa/Ndff6mkpCSllFJbtmwpEm1WSqmEhAQ1duxYNXLkSBUWFmaCSHOOMW2+du2aGjJkiIqLi1NKKXXnzh1ThJpjjGnzokWL1JYtW5RSSl25ckX17dvXFKHmmNOnT6vw8HA1aNCgxz5/9OhRNWXKFKXX69U///yjRowY8cznLNS3j8LCwnBzc6NUqVKYm5vTuHFjDh8+nGGfI0eO4OvrC0DDhg05deoUqgD3vRvT5ho1amBlZQVAxYoViYmJMUWoOcaYNgOsWbOGDh06YGFhYYIoc5Yxbd62bRtt27alePHiADg4OJgi1BxjTJs1TSMhIQGAhIQEHB0dTRFqjqlWrZrh/XucI0eO0Lx5czRNo1KlSty7d4/bt28/0zkLdVKIiYnB2dnZsO3s7PzIB+DD++h0OmxtbYmLi8vTOHOSMW1+2Pbt26ldu3YeRJZ7jGnz+fPniYqKKtC3Eh5mTJuvXbtGZGQkY8aMYdSoUYSEhORxlDnLmDZ37tyZ3bt38+GHHzJ16lT69OmT12HmqZiYGFxcXAzbT/p7N0ahTgoia7t27eL8+fO8+uqrpg4lV+n1elauXEmvXr1MHUqe0uv1REZGMm7cOAYMGMDixYu5d++eqcPKVXv37sXX15dFixYxYsQI5s2bh16vN3VYBUqhTgpOTk5ER0cbtqOjo3Fycsp0n7S0NBISErCzs8vTOHOSMW0GOHnyJOvXr2fo0KEF/nbKk9qclJTElStXmDBhAv369SM0NJTp06cTHh5uinBzhLG/2z4+Ppibm+Pq6krp0qWJjIzM61BzjDFt3r59O40aNQKgUqVKpKSkFOgr/ydxcnLKsPpaZn/v2VGok4K3tzeRkZHcvHmT1NRU9u3bh4+PT4Z96tWrx59//gmkj0ypXr06mqaZINqcYUybL1y4wJIlSxg6dGiBv88MT26zra0ty5YtY8GCBSxYsICKFSsydOjQAj36yJj3+YUXXuD06dMAxMbGEhkZSalSpUwRbo4wps0uLi6cOnUKgKtXr5KSkoK9vb0pws0TPj4+7Nq1C6UU586dw9bW9pn7UQr9jOZjx46xYsUK9Ho9fn5+dOrUiTVr1uDt7Y2Pjw/3799n/vz5XLhwgeLFi/Ppp58W6D8ceHKbJ02axOXLlylRogSQ/oc0bNgw0wb9jJ7U5oeNHz+enj17FuikAE9us1KKlStXEhISgpmZGZ06daJJkyamDvuZPKnNV69eZfHixSQlJQHQo0cPatWqZeKon96cOXM4c+YMcXFxODg40KVLF1JTUwFo06YNSimWLVvGiRMnsLS0pG/fvs/8e13ok4IQQgjjFerbR0IIIbJHkoIQQggDSQpCCCEMJCkIIYQwkKQghBDCQJKCKFDGjx/Ptm3bTB1Glnbv3s3kyZMzff7vv/8u8NVKReElQ1KFyfTr1487d+5gZva/7yZffvllljMyx48fT7NmzWjVqlWOxTF+/HhCQ0MxMzPD0tKSqlWr8s477+RYMbUuXbowd+5c3NzccuR4mVm7di3r16/H3NwcnU5H2bJl6dWrF5UqVcpXcYr8rdCvpyDyt2HDhvH888+bOgz69OlDq1atiI+PZ9asWaxYsYJPP/3U1GFlW6NGjejfvz9paWmsXbuW2bNns2jRIlOHJQoQSQoiX4mPj2f+/PmEhoai1+upXLky7733XobqmA9cv36dhQsXcvHiRczNzalRowYDBw4EICIigm+++Ybz589jb29P165dady48RPPX7x4cRo0aMAff/wBwD///MPy5cu5du0a7u7uBAQEULlyZQD+/PNPfvrpJ2JjY7Gzs+PNN9+kWbNm/Pnnn2zbto1JkyYxbtw4AIYMGQLARx99hIODA/PmzWPRokVs2LCB8PBwBg8ebIghKCgIpRR9+vQhISGBFStWcPz4cTRNw8/Pjy5dumS4unocnU5Hs2bNWL9+PbGxsdjb2xMWFkZQUBARERFYWlrSoEEDevfujbm5+WPjbNy4MUePHmX16tXcunWLsmXL8t577+Hp6fnEn6MouCQpiHxFKYWvry8DBw5Er9ezcOFCli1bxtChQx/Zd/Xq1dSqVYtx48aRmprK+fPngfQCeJMnT6ZLly6MHDmSy5cvM3nyZJ577jnKli2b5fljY2M5ePAg5cqVIz4+nmnTpvH222/TpEkT9u/fz7Rp05g7dy4WFhYEBQUxdepU3N3duX37NvHx8Y8cb8KECXTp0oUZM2YYbss8qEcE0KRJE3766ScSExOxsbFBr9ezf/9+AgMDAViwYAEODg7MnTuX5ORkpk2bhrOzM61bt86yHampqezcuRM7OzuKFSsGgJmZGb1798bb25vo6GimTp3Kli1beOWVVx4b54ULF1i4cCHDhg3D29ubXbt2MX36dObMmVPgiyiKzElHszCpGTNmEBAQQEBAANOnT8fOzo6GDRtiZWWFjY0NnTp14u+//37sa83Nzbl16xa3b9/G0tKSKlWqAOn1cUqWLImfnx86nQ4vLy8aNGjA/v37M40jKCiIgIAAhgwZgqOjI7179+bYsWO4ubnRvHlzdDodTZs2xd3dnaNHjwLpC7pcvnyZ+/fv4+joiIeHR7bbX7JkSby8vDh06BAAp06dwsrKikqVKnHnzh2OHz9OQEAA1tbWODg48Morr7Bv375Mj7d//34CAgJ466232LZtG4MGDUKn0wFQvnx5KlWqhE6nw9XVFX9/f86cOZPpsYKDg/H396dixYqYmZnh6+uLubk5oaGh2W6nKDjkSkGY1JAhQzL0KSQnJ7NixQpCQkIMtf8TExPR6/WP3DLp0aMHq1evZuTIkRQrVox27drRsmVLbt26RWhoKAEBAYZ909LSaN68eaZxvP322490XsfExDyydnXJkiWJiYnB2tqaTz/9lF9//ZVFixZRuXJlevXqRZkyZbL9M2jatCl79+6lRYsW7Nmzx1C0LioqirS0NN5//33Dvkqpx95Ke+BBn0JsbCyzZs3i/PnzVK9eHUhfdGflypWEh4dz//590tLSslzPNyoqip07d7J582bDY6mpqQV+pT6RNUkKIl/59ddfuXbtGp999hklSpTg4sWLDB069LFLpJYoUYIPP/wQgLNnzzJp0iSqVauGs7Mz1apVY8yYMc8Ui5OTEwcPHszwWFRUlGGlutq1a1O7dm3u37/P6tWrWbx4MRMnTsz2eRo1asTKlSuJjo7m0KFDhuGszs7OmJubs2zZMsO3fWPZ29vzwQcfMHz4cJo2bYqjoyNLly6lXLlyDBgwABsbG37//XcOHDiQ6TGcnZ3p1KkTnTp1ynabRMElt49EvpKUlISlpSW2trbEx8fz448/Zrrv/v37DYuuPLhvrmka9erVIzIykl27dpGamkpqaiphYWFcvXo1W7HUqVOHyMhI9uzZQ1paGvv27ePq1avUrVuXO3fucPjwYZKSkjA3N8fa2jrTdTgcHBy4ceNGpuext7enevXqfPXVV7i6uhr6PRwdHalVqxYrV64kISEBvV7P9evXs7zl8zB3d3dq1arFL7/8AqRfcdna2mJtbU1ERARbt27NMs5WrVrxxx9/EBoailKKpKQkjh07RmJiolHnFwWTXCmIfOXll19m7ty5vPPOOzg5OdGuXbtHFmd/IDw8nOXLl5OQkECJEiV4++23DWthjB49mhUrVrBixQqUUnh6etK7d+9sxWJnZ8fw4cMJCgpiyZIluLm5MXz4cOzt7bl9+za//fYb8+fPR9M0ypUrx3vvvffY43Tu3JkFCxZw//593n///ccubNS0aVPmz59Pjx49Mjz+8ccf8/333zNo0CASExMpVaoUHTp0MLoNr776KhMnTuS1116jZ8+efP311/zyyy94eXnRuHFjw4I0j4uzcePGfPDBB3zzzTdERkYa+m2qVq1q9PlFwSOT14QQQhjI7SMhhBAGkhSEEEIYSFIQQghhIElBCCGEgSQFIYQQBpIUhBBCGEhSEEIIYSBJQQghhMH/Ay+QoBa2XqhGAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# 切資料\n", "from sklearn.model_selection import train_test_split\n", "\n", "# preprocess (沒用到)\n", "# from sklearn.impute import SimpleImputer\n", "# from sklearn.pipeline import Pipeline\n", "\n", "# modeling\n", "from sklearn.tree import DecisionTreeClassifier\n", "\n", "# hyper-parameter tunning\n", "from sklearn.model_selection import RandomizedSearchCV\n", "\n", "# evaluation\n", "from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score\n", "\n", "# utils \n", "from scipy.stats import randint\n", "\n", "\n", "\n", "# 讀檔 & 切資料\n", "diabetes = pd.read_csv(\"data/diabetes.csv\")\n", "X = diabetes.drop(\"diabetes\", axis = 1)\n", "y = diabetes[\"diabetes\"]\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, \n", " y, \n", " test_size = 0.4, \n", " random_state = 42, \n", " stratify = y\n", ")\n", "\n", "# model\n", "tree = DecisionTreeClassifier()\n", "\n", "# hyper-parameter tunning\n", "param_dist = {\"max_depth\": [3, None],\n", " \"max_features\": randint(1, 9),\n", " \"min_samples_leaf\": randint(1, 9),\n", " \"criterion\": [\"gini\", \"entropy\"]}\n", "tree_randomcv = RandomizedSearchCV(tree, param_dist, cv=5)\n", "tree_randomcv.fit(X_train, y_train)\n", "print(\"Tuned Decision Tree Parameters: {}\".format(tree_randomcv.best_params_))\n", "print(\"Best score is {}\".format(tree_randomcv.best_score_))\n", "\n", "# 預測 training/testing set\n", "y_pred = tree_randomcv.predict(X_test)\n", "y_pred_prob = tree_randomcv.predict_proba(X_test)[:,1]\n", "\n", "\n", "# 評估結果\n", "print(\"AUC: {}\".format(roc_auc_score(y_test, y_pred_prob)))\n", "print(confusion_matrix(y_test, y_pred))\n", "print(classification_report(y_test, y_pred))\n", "\n", "fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n", "plt.plot([0,1],[0, 1], \"k--\")\n", "plt.plot(fpr, tpr, label = \"Decision Tree\")\n", "plt.xlabel(\"False Positive Rate\")\n", "plt.ylabel(\"True Positive Rate\")\n", "plt.title(\"Logistic Regression ROC Curve\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### SVC (binary)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tuned Model Parameters: {'SVM__C': 10, 'SVM__gamma': 0.01}\n", "Accuracy: 0.9541984732824428\n", " precision recall f1-score support\n", "\n", " democrat 0.99 0.94 0.96 83\n", " republican 0.90 0.98 0.94 48\n", "\n", " accuracy 0.95 131\n", " macro avg 0.95 0.96 0.95 131\n", "weighted avg 0.96 0.95 0.95 131\n", "\n" ] } ], "source": [ "# Import necessary modules\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.svm import SVC\n", "\n", "# 讀資料\n", "vote_raw = pd.read_csv(\"data/house-votes-84.csv\")\n", "vote = vote_raw.copy()\n", "col_names = ['party', 'infants', 'water', 'budget', 'physician', 'salvador',\n", " 'religious', 'satellite', 'aid', 'missile', 'immigration', 'synfuels',\n", " 'education', 'superfund', 'crime', 'duty_free_exports', 'eaa_rsa']\n", "vote.columns = col_names\n", "vote[vote == \"?\"] = np.nan # 把 ? 改成 na\n", "for i in col_names[1:]:\n", " vote[i] = vote[i].replace({\"y\": 1, \"n\": 0})\n", "X = vote.drop(\"party\", axis = 1)\n", "y = vote[\"party\"]\n", "\n", "# 切資料\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)\n", "\n", "# pipeline: 前處理 & model\n", "steps = [('imputation', SimpleImputer(missing_values=np.nan, strategy='most_frequent')),\n", " (\"scaler\", StandardScaler()),\n", " ('SVM', SVC())]\n", "\n", "# Create the pipeline: pipeline\n", "pipeline = Pipeline(steps)\n", "\n", "\n", "# hyper-parameter tunning\n", "parameters = {'SVM__C':[1, 10, 100],\n", " 'SVM__gamma':[0.1, 0.01]}\n", "cv = GridSearchCV(pipeline, parameters, cv = 3)\n", "cv.fit(X_train, y_train)\n", "print(\"Tuned Model Parameters: {}\".format(cv.best_params_))\n", "\n", "# 預測\n", "y_pred = cv.predict(X_test)\n", "\n", "# performance\n", "print(\"Accuracy: {}\".format(cv.score(X_test, y_test)))\n", "print(classification_report(y_test, y_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gapminder Data" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
populationfertilityHIVCO2BMI_maleGDPBMI_femalelifechild_mortalityRegion
034811059.02.730.13.32894524.5962012314.0129.904975.329.5Middle East & North Africa
119842251.06.432.01.47435322.250837103.0130.124758.3192.0Sub-Saharan Africa
240381860.02.240.54.78517027.5017014646.0118.891575.515.4America
32975029.01.400.11.80410625.355427383.0132.810872.520.0Europe & Central Asia
421370348.01.960.118.01631327.5637341312.0117.375581.55.2East Asia & Pacific
\n", "
" ], "text/plain": [ " population fertility HIV CO2 BMI_male GDP BMI_female life \\\n", "0 34811059.0 2.73 0.1 3.328945 24.59620 12314.0 129.9049 75.3 \n", "1 19842251.0 6.43 2.0 1.474353 22.25083 7103.0 130.1247 58.3 \n", "2 40381860.0 2.24 0.5 4.785170 27.50170 14646.0 118.8915 75.5 \n", "3 2975029.0 1.40 0.1 1.804106 25.35542 7383.0 132.8108 72.5 \n", "4 21370348.0 1.96 0.1 18.016313 27.56373 41312.0 117.3755 81.5 \n", "\n", " child_mortality Region \n", "0 29.5 Middle East & North Africa \n", "1 192.0 Sub-Saharan Africa \n", "2 15.4 America \n", "3 20.0 Europe & Central Asia \n", "4 5.2 East Asia & Pacific " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "gapminder = pd.read_csv(\"data/gm_2008_region.csv\")\n", "gapminder.head()" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 139 entries, 0 to 138\n", "Data columns (total 10 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 population 139 non-null float64\n", " 1 fertility 139 non-null float64\n", " 2 HIV 139 non-null float64\n", " 3 CO2 139 non-null float64\n", " 4 BMI_male 139 non-null float64\n", " 5 GDP 139 non-null float64\n", " 6 BMI_female 139 non-null float64\n", " 7 life 139 non-null float64\n", " 8 child_mortality 139 non-null float64\n", " 9 Region 139 non-null object \n", "dtypes: float64(9), object(1)\n", "memory usage: 11.0+ KB\n" ] } ], "source": [ "gapminder.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Linear Regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### train/test" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "R^2: 0.8219419939587727\n", "Root Mean Squared Error: 3.405248115733344\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import mean_squared_error\n", "\n", "# preprocess. \n", "# gapminder_onehot = pd.get_dummies(gapminder)\n", "gapminder_dummy = pd.get_dummies(gapminder, drop_first = True)\n", "\n", "# 分割資料\n", "X = gapminder_dummy.drop(\"life\", axis = 1)\n", "y = gapminder_dummy[\"life\"]\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)\n", "\n", "# fit model\n", "reg_all = LinearRegression()\n", "reg_all.fit(X_train, y_train)\n", "\n", "# Predict on the test data: y_pred\n", "y_pred = reg_all.predict(X_test)\n", "\n", "# Compute and print R^2 and RMSE\n", "print(\"R^2: {}\".format(reg_all.score(X_test, y_test)))\n", "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n", "print(\"Root Mean Squared Error: {}\".format(rmse))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### CV" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.8196741 0.80301541 0.89758712 0.80425614 0.94015848]\n", "Average 5-Fold CV Score: 0.8529382494240787\n" ] } ], "source": [ "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import cross_val_score\n", "\n", "# 分割資料\n", "# 不分割了,因為等等直接用 cv\n", "\n", "# fit model\n", "reg = LinearRegression()\n", "cv_scores = cross_val_score(reg, X, y, cv = 5)\n", "\n", "print(cv_scores) # 5-fold cv 結果\n", "\n", "print(f\"Average 5-Fold CV Score: {str(np.mean(cv_scores))}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Lasso Regression" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "R^2: 0.8401265152705228\n", "Root Mean Squared Error: 3.2266824659707334\n" ] } ], "source": [ "# Import Lasso\n", "from sklearn.linear_model import Lasso\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import make_pipeline\n", "\n", "# pipeline\n", "lasso_pipe = make_pipeline(\n", " StandardScaler(),\n", " Lasso(alpha = 0.4)\n", ")\n", "\n", "# Fit the regressor to the data\n", "lasso_pipe.fit(X_train, y_train)\n", "\n", "y_pred = lasso_pipe.predict(X_test)\n", "\n", "# Compute and print R^2 and RMSE\n", "print(\"R^2: {}\".format(lasso_pipe.score(X_test, y_test)))\n", "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n", "print(\"Root Mean Squared Error: {}\".format(rmse))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 看一下哪些變數被 shrinkage 到 0 ,哪些變數最重要:" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[-0. -0.08591553 -2.91968634 -0. 0.58693244 1.6922106\n", " -1.11083667 -4.3362549 -0.48746711 0. 0. 0.\n", " -0.1705074 ]\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Compute and print the coefficients\n", "lasso_coef = lasso_pipe.named_steps[\"lasso\"].coef_\n", "print(lasso_coef)\n", "\n", "# Plot the coefficients\n", "\n", "plt.plot(range(len(X_train.columns)), lasso_coef)\n", "plt.xticks(range(len(X_train.columns)), X_train.columns.values, rotation=60)\n", "plt.margins(0.02)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ridge Regression" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "def display_plot(cv_scores, cv_scores_std):\n", " fig = plt.figure()\n", " ax = fig.add_subplot(1,1,1)\n", " ax.plot(alpha_space, cv_scores)\n", "\n", " std_error = cv_scores_std / np.sqrt(10)\n", "\n", " ax.fill_between(alpha_space, cv_scores + std_error, cv_scores - std_error, alpha=0.2)\n", " ax.set_ylabel('CV Score +/- Std Error')\n", " ax.set_xlabel('Alpha')\n", " ax.axhline(np.max(cv_scores), linestyle='--', color='.5')\n", " ax.set_xlim([alpha_space[0], alpha_space[-1]])\n", " ax.set_xscale('log')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import necessary modules\n", "from sklearn.linear_model import Ridge\n", "from sklearn.model_selection import cross_val_score\n", "\n", "# Setup the array of alphas and lists to store scores\n", "alpha_space = np.logspace(-4, 0, 50)\n", "ridge_scores = []\n", "ridge_scores_std = []\n", "\n", "# ridge = Ridge(normalize=True)\n", "\n", "# Compute scores over range of alphas\n", "for alpha in alpha_space:\n", "\n", " # Specify the alpha value to use: ridge.alpha\n", " \n", " ridge = make_pipeline(\n", " StandardScaler(),\n", " Ridge(alpha = alpha)\n", " )\n", " # ridge.named_steps[\"ridge\"].alpha = alpha\n", " #ridge.alpha = alpha\n", " \n", " # Perform 10-fold CV: ridge_cv_scores\n", " ridge_cv_scores = cross_val_score(ridge, X, y, cv=10)\n", " \n", " # Append the mean of ridge_cv_scores to ridge_scores\n", " ridge_scores.append(np.mean(ridge_cv_scores))\n", " \n", " # Append the std of ridge_cv_scores to ridge_scores_std\n", " ridge_scores_std.append(np.std(ridge_cv_scores))\n", "\n", "# Display the plot\n", "display_plot(ridge_scores, ridge_scores_std)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Elastic net" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_coordinate_descent.py:647: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.126e+02, tolerance: 5.589e-01 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.\n", " model = cd_fast.enet_coordinate_descent(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_coordinate_descent.py:647: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.410e+02, tolerance: 5.893e-01 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.\n", " model = cd_fast.enet_coordinate_descent(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_coordinate_descent.py:647: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 7.983e+02, tolerance: 5.890e-01 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.\n", " model = cd_fast.enet_coordinate_descent(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_coordinate_descent.py:647: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.506e+02, tolerance: 5.814e-01 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.\n", " model = cd_fast.enet_coordinate_descent(\n", "/Volumes/GoogleDrive/我的雲端硬碟/0. codepool_python/python_ds/python_ds_env/lib/python3.8/site-packages/sklearn/linear_model/_coordinate_descent.py:647: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.375e+02, tolerance: 5.802e-01 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.\n", " model = cd_fast.enet_coordinate_descent(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Tuned ElasticNet l1 ratio: {'elasticnet__l1_ratio': 1.0}\n", "Tuned ElasticNet R squared: 0.8862016549771035\n", "Tuned ElasticNet MSE: 8.594868215979249\n" ] } ], "source": [ "# Import necessary modules\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.linear_model import ElasticNet\n", "from sklearn.metrics import mean_squared_error\n", "\n", "# 讀資料 & 切資料\n", "gapminder = pd.read_csv(\"data/gm_2008_region.csv\")\n", "X = gapminder_dummy.drop(\"life\", axis = 1)\n", "y = gapminder_dummy[\"life\"]\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state = 42)\n", "\n", "# pipeline: 前處理 & model\n", "steps = [('imputation', SimpleImputer(missing_values=np.nan, strategy='mean')),\n", " (\"scaler\", StandardScaler()),\n", " (\"elasticnet\", ElasticNet())]\n", "pipeline = Pipeline(steps)\n", "\n", "# hyper-parameter tunning\n", "parameters = {\"elasticnet__l1_ratio\":np.linspace(0,1,30)} # l1_ratio*L1_loss + (1-l1_ratio)*L2_loss\n", "gm_cv = GridSearchCV(pipeline, parameters, cv = 5)\n", "gm_cv.fit(X_train, y_train)\n", "print(\"Tuned ElasticNet l1 ratio: {}\".format(gm_cv.best_params_))\n", "\n", "# Predict\n", "y_pred = gm_cv.predict(X_test)\n", "\n", "# performance\n", "r2 = gm_cv.score(X_test, y_test)\n", "mse = mean_squared_error(y_test, y_pred)\n", "print(\"Tuned ElasticNet R squared: {}\".format(r2))\n", "print(\"Tuned ElasticNet MSE: {}\".format(mse))" ] } ], "metadata": { "kernelspec": { "display_name": "python_ds_env", "language": "python", "name": "python_ds_env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 4 }