{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import Dependencies\n", "import numpy as np\n", "import pandas as pd\n", "import tensorflow as tf\n", "from sklearn.datasets import load_boston\n", "boston = load_boston()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 독립 변수와 종속 변수를 분리한다." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "X_data = pd.DataFrame(boston.data, columns=boston.feature_names)\n", "y_data = pd.DataFrame(boston.target, columns=[\"Target\"])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CRIM</th>\n", " <th>ZN</th>\n", " <th>INDUS</th>\n", " <th>CHAS</th>\n", " <th>NOX</th>\n", " <th>RM</th>\n", " <th>AGE</th>\n", " <th>DIS</th>\n", " <th>RAD</th>\n", " <th>TAX</th>\n", " <th>PTRATIO</th>\n", " <th>B</th>\n", " <th>LSTAT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.00632</td>\n", " <td>18.0</td>\n", " <td>2.31</td>\n", " <td>0.0</td>\n", " <td>0.538</td>\n", " <td>6.575</td>\n", " <td>65.2</td>\n", " <td>4.0900</td>\n", " <td>1.0</td>\n", " <td>296.0</td>\n", " <td>15.3</td>\n", " <td>396.90</td>\n", " <td>4.98</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.02731</td>\n", " <td>0.0</td>\n", " <td>7.07</td>\n", " <td>0.0</td>\n", " <td>0.469</td>\n", " <td>6.421</td>\n", " <td>78.9</td>\n", " <td>4.9671</td>\n", " <td>2.0</td>\n", " <td>242.0</td>\n", " <td>17.8</td>\n", " <td>396.90</td>\n", " <td>9.14</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>0.02729</td>\n", " <td>0.0</td>\n", " <td>7.07</td>\n", " <td>0.0</td>\n", " <td>0.469</td>\n", " <td>7.185</td>\n", " <td>61.1</td>\n", " <td>4.9671</td>\n", " <td>2.0</td>\n", " <td>242.0</td>\n", " <td>17.8</td>\n", " <td>392.83</td>\n", " <td>4.03</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>0.03237</td>\n", " <td>0.0</td>\n", " <td>2.18</td>\n", " <td>0.0</td>\n", " <td>0.458</td>\n", " <td>6.998</td>\n", " <td>45.8</td>\n", " <td>6.0622</td>\n", " <td>3.0</td>\n", " <td>222.0</td>\n", " <td>18.7</td>\n", " <td>394.63</td>\n", " <td>2.94</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>0.06905</td>\n", " <td>0.0</td>\n", " <td>2.18</td>\n", " <td>0.0</td>\n", " <td>0.458</td>\n", " <td>7.147</td>\n", " <td>54.2</td>\n", " <td>6.0622</td>\n", " <td>3.0</td>\n", " <td>222.0</td>\n", " <td>18.7</td>\n", " <td>396.90</td>\n", " <td>5.33</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", "\n", " PTRATIO B LSTAT \n", "0 15.3 396.90 4.98 \n", "1 17.8 396.90 9.14 \n", "2 17.8 392.83 4.03 \n", "3 18.7 394.63 2.94 \n", "4 18.7 396.90 5.33 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Target</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>24.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>21.6</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>34.7</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>33.4</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>36.2</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Target\n", "0 24.0\n", "1 21.6\n", "2 34.7\n", "3 33.4\n", "4 36.2" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train Test 데이터를 분리한다" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((404, 13), (102, 13), (404, 1), (102, 1))" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape, X_test.shape, y_train.shape, y_test.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### StandardScaler를 사용하여 스케일링한다" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "StandardScaler(copy=True, with_mean=True, with_std=True)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import StandardScaler\n", "# 객체로 사용해야 나중에 Test데이터에 같은 Mean, Variance를 사용할 수 있다.\n", "scaler = StandardScaler()\n", "scaler.fit(X_train)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CRIM</th>\n", " <th>ZN</th>\n", " <th>INDUS</th>\n", " <th>CHAS</th>\n", " <th>NOX</th>\n", " <th>RM</th>\n", " <th>AGE</th>\n", " <th>DIS</th>\n", " <th>RAD</th>\n", " <th>TAX</th>\n", " <th>PTRATIO</th>\n", " <th>B</th>\n", " <th>LSTAT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>42</th>\n", " <td>-0.386768</td>\n", " <td>-0.495593</td>\n", " <td>-0.609290</td>\n", " <td>-0.293294</td>\n", " <td>-0.899583</td>\n", " <td>-0.144968</td>\n", " <td>-2.150030</td>\n", " <td>0.894455</td>\n", " <td>-0.746330</td>\n", " <td>-1.008508</td>\n", " <td>-0.248578</td>\n", " <td>0.286742</td>\n", " <td>-0.966850</td>\n", " </tr>\n", " <tr>\n", " <th>58</th>\n", " <td>-0.385349</td>\n", " <td>0.579239</td>\n", " <td>-0.869526</td>\n", " <td>-0.293294</td>\n", " <td>-0.856756</td>\n", " <td>-0.179832</td>\n", " <td>-1.357820</td>\n", " <td>1.882903</td>\n", " <td>-0.169594</td>\n", " <td>-0.706413</td>\n", " <td>0.582147</td>\n", " <td>0.366695</td>\n", " <td>-0.821168</td>\n", " </tr>\n", " <tr>\n", " <th>385</th>\n", " <td>1.439108</td>\n", " <td>-0.495593</td>\n", " <td>1.026692</td>\n", " <td>-0.293294</td>\n", " <td>1.258877</td>\n", " <td>-1.440773</td>\n", " <td>1.057367</td>\n", " <td>-1.132950</td>\n", " <td>1.675959</td>\n", " <td>1.556337</td>\n", " <td>0.812904</td>\n", " <td>0.434727</td>\n", " <td>2.501775</td>\n", " </tr>\n", " <tr>\n", " <th>78</th>\n", " <td>-0.396082</td>\n", " <td>-0.495593</td>\n", " <td>0.256216</td>\n", " <td>-0.293294</td>\n", " <td>-0.993801</td>\n", " <td>-0.053448</td>\n", " <td>-0.499009</td>\n", " <td>0.560803</td>\n", " <td>-0.515636</td>\n", " <td>-0.031142</td>\n", " <td>0.120633</td>\n", " <td>0.319883</td>\n", " <td>-0.060845</td>\n", " </tr>\n", " <tr>\n", " <th>424</th>\n", " <td>0.560723</td>\n", " <td>-0.495593</td>\n", " <td>1.026692</td>\n", " <td>-0.293294</td>\n", " <td>0.265300</td>\n", " <td>-1.022396</td>\n", " <td>0.093395</td>\n", " <td>-0.832059</td>\n", " <td>1.675959</td>\n", " <td>1.556337</td>\n", " <td>0.812904</td>\n", " <td>-3.866459</td>\n", " <td>0.607906</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CRIM ZN INDUS CHAS NOX RM AGE \\\n", "42 -0.386768 -0.495593 -0.609290 -0.293294 -0.899583 -0.144968 -2.150030 \n", "58 -0.385349 0.579239 -0.869526 -0.293294 -0.856756 -0.179832 -1.357820 \n", "385 1.439108 -0.495593 1.026692 -0.293294 1.258877 -1.440773 1.057367 \n", "78 -0.396082 -0.495593 0.256216 -0.293294 -0.993801 -0.053448 -0.499009 \n", "424 0.560723 -0.495593 1.026692 -0.293294 0.265300 -1.022396 0.093395 \n", "\n", " DIS RAD TAX PTRATIO B LSTAT \n", "42 0.894455 -0.746330 -1.008508 -0.248578 0.286742 -0.966850 \n", "58 1.882903 -0.169594 -0.706413 0.582147 0.366695 -0.821168 \n", "385 -1.132950 1.675959 1.556337 0.812904 0.434727 2.501775 \n", "78 0.560803 -0.515636 -0.031142 0.120633 0.319883 -0.060845 \n", "424 -0.832059 1.675959 1.556337 0.812904 -3.866459 0.607906 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)\n", "X_train.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>CRIM</th>\n", " <th>ZN</th>\n", " <th>INDUS</th>\n", " <th>CHAS</th>\n", " <th>NOX</th>\n", " <th>RM</th>\n", " <th>AGE</th>\n", " <th>DIS</th>\n", " <th>RAD</th>\n", " <th>TAX</th>\n", " <th>PTRATIO</th>\n", " <th>B</th>\n", " <th>LSTAT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>307</th>\n", " <td>-0.396864</td>\n", " <td>0.923185</td>\n", " <td>-1.300817</td>\n", " <td>-0.293294</td>\n", " <td>-0.694015</td>\n", " <td>0.842866</td>\n", " <td>0.082879</td>\n", " <td>-0.303729</td>\n", " <td>-0.284942</td>\n", " <td>-1.073665</td>\n", " <td>-0.017821</td>\n", " <td>0.434727</td>\n", " <td>-0.728209</td>\n", " </tr>\n", " <tr>\n", " <th>343</th>\n", " <td>-0.399481</td>\n", " <td>1.869037</td>\n", " <td>-1.066897</td>\n", " <td>-0.293294</td>\n", " <td>-0.591232</td>\n", " <td>0.620603</td>\n", " <td>-0.404365</td>\n", " <td>0.899742</td>\n", " <td>-0.515636</td>\n", " <td>-0.196998</td>\n", " <td>-0.387032</td>\n", " <td>0.434727</td>\n", " <td>-0.776769</td>\n", " </tr>\n", " <tr>\n", " <th>47</th>\n", " <td>-0.377155</td>\n", " <td>-0.495593</td>\n", " <td>-0.609290</td>\n", " <td>-0.293294</td>\n", " <td>-0.899583</td>\n", " <td>-0.346892</td>\n", " <td>0.615692</td>\n", " <td>0.879585</td>\n", " <td>-0.746330</td>\n", " <td>-1.008508</td>\n", " <td>-0.248578</td>\n", " <td>0.389227</td>\n", " <td>0.835448</td>\n", " </tr>\n", " <tr>\n", " <th>67</th>\n", " <td>-0.395926</td>\n", " <td>0.041823</td>\n", " <td>-0.732098</td>\n", " <td>-0.293294</td>\n", " <td>-1.233630</td>\n", " <td>-0.567702</td>\n", " <td>-1.631238</td>\n", " <td>1.261294</td>\n", " <td>-0.630983</td>\n", " <td>-0.345084</td>\n", " <td>0.212936</td>\n", " <td>0.427180</td>\n", " <td>-0.649124</td>\n", " </tr>\n", " <tr>\n", " <th>362</th>\n", " <td>0.000604</td>\n", " <td>-0.495593</td>\n", " <td>1.026692</td>\n", " <td>-0.293294</td>\n", " <td>1.858449</td>\n", " <td>-1.317293</td>\n", " <td>0.990765</td>\n", " <td>-0.813129</td>\n", " <td>1.675959</td>\n", " <td>1.556337</td>\n", " <td>0.812904</td>\n", " <td>0.258523</td>\n", " <td>-0.359147</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " CRIM ZN INDUS CHAS NOX RM AGE \\\n", "307 -0.396864 0.923185 -1.300817 -0.293294 -0.694015 0.842866 0.082879 \n", "343 -0.399481 1.869037 -1.066897 -0.293294 -0.591232 0.620603 -0.404365 \n", "47 -0.377155 -0.495593 -0.609290 -0.293294 -0.899583 -0.346892 0.615692 \n", "67 -0.395926 0.041823 -0.732098 -0.293294 -1.233630 -0.567702 -1.631238 \n", "362 0.000604 -0.495593 1.026692 -0.293294 1.858449 -1.317293 0.990765 \n", "\n", " DIS RAD TAX PTRATIO B LSTAT \n", "307 -0.303729 -0.284942 -1.073665 -0.017821 0.434727 -0.728209 \n", "343 0.899742 -0.515636 -0.196998 -0.387032 0.434727 -0.776769 \n", "47 0.879585 -0.746330 -1.008508 -0.248578 0.389227 0.835448 \n", "67 1.261294 -0.630983 -0.345084 0.212936 0.427180 -0.649124 \n", "362 -0.813129 1.675959 1.556337 0.812904 0.258523 -0.359147 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 주의 : Test는 Fit을하면 안된다!\n", "X_test = pd.DataFrame(data=scaler.transform(X_test), columns=X_test.columns, index=X_test.index)\n", "X_test.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tensorflow에서 사용할 땐 Numpy 데이터 타입으로 사용할 예정이니 변환하자" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train = np.array(X_train)\n", "y_train = np.array(y_train)\n", "X_test = np.array(X_test)\n", "y_test = np.array(y_test)\n", "type(X_train), type(y_train), type(X_test), type(y_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tensorflow Model 정의" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Learning Rate\n", "lr = 0.01\n", "\n", "# 가중치를 몇번 업데이트 할 것인가?\n", "epochs = 2000\n", "\n", "# Features 독립 변수\n", "X = tf.placeholder(dtype=tf.float32, shape=[None, X_train.shape[1]])\n", "# Labels 종속 변수\n", "y = tf.placeholder(dtype=tf.float32, shape=[None, 1])\n", "\n", "# Weight 가중치, 초기값은 정규분포에서 랜덤하게 뽑는다\n", "W = tf.Variable(tf.random_normal([X_train.shape[1], 1]))\n", "# Bias 초기값은 정규분포에서 랜덤하게 뽑는다\n", "b = tf.Variable(tf.random_normal([1]))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# tf.Variable을 사용했거나, 메서드 내부적으로 변수가 존재하는 경우에는 Variables\n", "# 초기화해줘야 한다.\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# 우리가 예측하는 값 W*X + b\n", "hypothesis = tf.add(tf.matmul(X, W), b)\n", "\n", "# cost function으로는 MSE를 사용\n", "cost = tf.reduce_mean(tf.square(y - hypothesis))\n", "\n", "# Gradient Descent 방법으로 최적화\n", "optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(cost)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# cost_history를 기록하면 마지막에 epoch 변화에 따른 cost 변화를 확인할 때 편리하다\n", "cost_history = np.empty(shape=[1], dtype=float)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch: 0, Error: 604.2737426757812\n", "Epoch: 100, Error: 33.710201263427734\n", "Epoch: 200, Error: 23.05824851989746\n", "Epoch: 300, Error: 22.47170639038086\n", "Epoch: 400, Error: 22.255775451660156\n", "Epoch: 500, Error: 22.13414764404297\n", "Epoch: 600, Error: 22.059106826782227\n", "Epoch: 700, Error: 22.009729385375977\n", "Epoch: 800, Error: 21.975486755371094\n", "Epoch: 900, Error: 21.950754165649414\n", "Epoch: 1000, Error: 21.932348251342773\n", "Epoch: 1100, Error: 21.918357849121094\n", "Epoch: 1200, Error: 21.907577514648438\n", "Epoch: 1300, Error: 21.899185180664062\n", "Epoch: 1400, Error: 21.892616271972656\n", "Epoch: 1500, Error: 21.8874568939209\n", "Epoch: 1600, Error: 21.88338851928711\n", "Epoch: 1700, Error: 21.88017463684082\n", "Epoch: 1800, Error: 21.877634048461914\n", "Epoch: 1900, Error: 21.87563133239746\n", "Epoch: 2000, Error: 21.874052047729492\n" ] } ], "source": [ "with tf.Session() as sess:\n", " sess.run(init)\n", " \n", " for epoch in range(0, epochs):\n", " # optimizer에서 반환하는 값은 의미가 없으니 _로 받아주자\n", " _, err = sess.run([optimizer, cost], feed_dict={X: X_train, y: y_train})\n", " \n", " cost_history = np.append(cost_history, err)\n", " \n", " # 100 번에 한번씩 Error 변화를 확인하자\n", " if epoch%100 == 0:\n", " print('Epoch: {0}, Error: {1}'.format(epoch, err))\n", " \n", " print('Epoch: {0}, Error: {1}'.format(epoch + 1, err))\n", " \n", " # 우리가 설정한 Epochs만큼의 학습이 끝난 후에 나온 값을 확인하기 위해 받아두자\n", " updated_W = sess.run(W)\n", " updated_b = sess.run(b)\n", " \n", " # Test 데이터를 예측한 값\n", " y_pred = sess.run(hypothesis, feed_dict={X: X_test})\n", " \n", " # Mean Squared Error\n", " mse = sess.run(tf.reduce_mean(tf.square(y_pred - y_test)))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Trained Bias: \n", " [22.52223]\n", "Trained Weights: \n", " [[-1.0024459 ]\n", " [ 1.3155702 ]\n", " [ 0.05793529]\n", " [ 0.5868825 ]\n", " [-2.2774897 ]\n", " [ 2.1402504 ]\n", " [ 0.11897256]\n", " [-3.1695282 ]\n", " [ 2.4372222 ]\n", " [-1.6443572 ]\n", " [-2.1415503 ]\n", " [ 0.67585033]\n", " [-3.9236772 ]]\n" ] } ], "source": [ "# 최종 Bias\n", "print('Trained Bias: \\n', updated_b)\n", "print('Trained Weights: \\n', updated_W)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean Squared Error: 23.407308966316197\n" ] } ], "source": [ "print('Mean Squared Error: ',mse)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "image/png": { "height": 280, "width": 404 } }, "output_type": "display_data" } ], "source": [ "plt.plot(range(len(cost_history)), cost_history)\n", "plt.axis([0,epochs,0,np.max(cost_history)])\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Cost')\n", "plt.title('Cost 변화', fontsize=15)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "image/png": { "height": 280, "width": 384 } }, "output_type": "display_data" } ], "source": [ "plt.scatter(y_test, y_pred)\n", "plt.xlabel(u\"실제 집값\")\n", "plt.ylabel(u\"집값 예측치\")\n", "plt.title(\"집값 예측치와 실제 집값의 관계\", fontsize=15)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tensorflow Estimator API를 사용하여 Linear Regression하는 방법" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',\n", " 'PTRATIO', 'B', 'LSTAT'],\n", " dtype='object')" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_data.columns" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(13,)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(X_train).shape[1:]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "feat_cols = [tf.feature_column.numeric_column('x', shape=np.array(X_train).shape[1:])]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# Make Feature Columns\n", "feat_cols = [tf.feature_column.numeric_column('x', shape=np.array(X_train).shape[1:])]\n", "# Make Input Function\n", "input_func = tf.estimator.inputs.numpy_input_fn({'x': X_train}, y_train, batch_size=1, num_epochs=2000, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Using default config.\n", "WARNING:tensorflow:Using temporary folder as model directory: /var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t\n", "INFO:tensorflow:Using config: {'_model_dir': '/var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x119148320>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n" ] } ], "source": [ "# Define Linear Regressor Model\n", "# Supported Optimizers: ('Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD')\n", "linear_model = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer='Adam')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# Set up Estimator Training Inputs\n", "train_input_func = tf.estimator.inputs.numpy_input_fn(X_train, y_train, batch_size=1, num_epochs=1000, shuffle=False)\n", "# Set up Estimator Test Inputs\n", "eval_input_func = tf.estimator.inputs.numpy_input_fn({'x': X_test}, y_test, batch_size=1, num_epochs=1, shuffle=False)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Calling model_fn.\n", "INFO:tensorflow:Done calling model_fn.\n", "INFO:tensorflow:Create CheckpointSaverHook.\n", "INFO:tensorflow:Graph was finalized.\n", "INFO:tensorflow:Running local_init_op.\n", "INFO:tensorflow:Done running local_init_op.\n", "INFO:tensorflow:Saving checkpoints for 0 into /var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t/model.ckpt.\n", "INFO:tensorflow:loss = 225.0, step = 1\n", "INFO:tensorflow:global_step/sec: 890.828\n", "INFO:tensorflow:loss = 253.62346, step = 101 (0.114 sec)\n", "INFO:tensorflow:global_step/sec: 1038.08\n", "INFO:tensorflow:loss = 0.07589463, step = 201 (0.097 sec)\n", "INFO:tensorflow:global_step/sec: 1002.72\n", "INFO:tensorflow:loss = 2.1461585, step = 301 (0.100 sec)\n", "INFO:tensorflow:global_step/sec: 1143.59\n", "INFO:tensorflow:loss = 0.66625136, step = 401 (0.087 sec)\n", "INFO:tensorflow:global_step/sec: 1009.44\n", "INFO:tensorflow:loss = 21.957817, step = 501 (0.099 sec)\n", "INFO:tensorflow:global_step/sec: 988.006\n", "INFO:tensorflow:loss = 8.972441, step = 601 (0.101 sec)\n", "INFO:tensorflow:global_step/sec: 1050.7\n", "INFO:tensorflow:loss = 0.17537297, step = 701 (0.095 sec)\n", "INFO:tensorflow:global_step/sec: 1006.66\n", "INFO:tensorflow:loss = 6.7108874, step = 801 (0.100 sec)\n", "INFO:tensorflow:global_step/sec: 1124.59\n", "INFO:tensorflow:loss = 38.028217, step = 901 (0.089 sec)\n", "INFO:tensorflow:global_step/sec: 1075.72\n", "INFO:tensorflow:loss = 3.514638, step = 1001 (0.093 sec)\n", "INFO:tensorflow:global_step/sec: 1086.39\n", "INFO:tensorflow:loss = 0.009802317, step = 1101 (0.092 sec)\n", "INFO:tensorflow:global_step/sec: 1124.27\n", "INFO:tensorflow:loss = 6.4648666, step = 1201 (0.090 sec)\n", "INFO:tensorflow:global_step/sec: 1099.32\n", "INFO:tensorflow:loss = 1.2076899, step = 1301 (0.090 sec)\n", "INFO:tensorflow:global_step/sec: 1145.83\n", "INFO:tensorflow:loss = 21.732845, step = 1401 (0.087 sec)\n", "INFO:tensorflow:global_step/sec: 1134.79\n", "INFO:tensorflow:loss = 0.6822102, step = 1501 (0.089 sec)\n", "INFO:tensorflow:global_step/sec: 1114.55\n", "INFO:tensorflow:loss = 31.725891, step = 1601 (0.090 sec)\n", "INFO:tensorflow:global_step/sec: 1090.22\n", "INFO:tensorflow:loss = 1.2142678, step = 1701 (0.092 sec)\n", "INFO:tensorflow:global_step/sec: 1036.86\n", "INFO:tensorflow:loss = 19.478325, step = 1801 (0.095 sec)\n", "INFO:tensorflow:global_step/sec: 1112.72\n", "INFO:tensorflow:loss = 62.885754, step = 1901 (0.090 sec)\n", "INFO:tensorflow:Saving checkpoints for 2000 into /var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t/model.ckpt.\n", "INFO:tensorflow:Loss for final step: 55.036022.\n" ] }, { "data": { "text/plain": [ "<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x119148eb8>" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the Linear Regressor Estimator\n", "linear_model.train(input_fn=input_func, steps=2000)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Calling model_fn.\n", "INFO:tensorflow:Done calling model_fn.\n", "INFO:tensorflow:Starting evaluation at 2018-11-01-10:59:40\n", "INFO:tensorflow:Graph was finalized.\n", "INFO:tensorflow:Restoring parameters from /var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t/model.ckpt-2000\n", "INFO:tensorflow:Running local_init_op.\n", "INFO:tensorflow:Done running local_init_op.\n", "INFO:tensorflow:Evaluation [10/100]\n", "INFO:tensorflow:Evaluation [20/100]\n", "INFO:tensorflow:Evaluation [30/100]\n", "INFO:tensorflow:Evaluation [40/100]\n", "INFO:tensorflow:Evaluation [50/100]\n", "INFO:tensorflow:Evaluation [60/100]\n", "INFO:tensorflow:Evaluation [70/100]\n", "INFO:tensorflow:Evaluation [80/100]\n", "INFO:tensorflow:Evaluation [90/100]\n", "INFO:tensorflow:Evaluation [100/100]\n", "INFO:tensorflow:Finished evaluation at 2018-11-01-10:59:40\n", "INFO:tensorflow:Saving dict for global step 2000: average_loss = 26.29743, global_step = 2000, loss = 26.29743\n", "INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2000: /var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t/model.ckpt-2000\n" ] } ], "source": [ "# Test the Model\n", "test_metrics = linear_model.evaluate(input_fn=eval_input_func, steps=100)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Calling model_fn.\n", "INFO:tensorflow:Done calling model_fn.\n", "INFO:tensorflow:Graph was finalized.\n", "INFO:tensorflow:Restoring parameters from /var/folders/gc/y94kqvf109v1_tthvbls56wc0000gn/T/tmp28oygs1t/model.ckpt-2000\n", "INFO:tensorflow:Running local_init_op.\n", "INFO:tensorflow:Done running local_init_op.\n" ] } ], "source": [ "# Get Predicted Values as an Array\n", "predicted_vals = []\n", "\n", "for pred in linear_model.predict(input_fn=eval_input_func):\n", " predicted_vals.append(pred['predictions'])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "image/png": { "height": 280, "width": 384 } }, "output_type": "display_data" } ], "source": [ "plt.scatter(y_test, predicted_vals)\n", "plt.xlabel(u\"실제 집값\")\n", "plt.ylabel(u\"집값 예측치\")\n", "plt.title(\"집값 예측치와 실제 집값의 관계\", fontsize=15)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }