{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Predicting House Prices on [Kaggle](https://www.kaggle.com/c/house-prices-advanced-regression-techniques)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-02-14T19:15:45.685992Z", "start_time": "2019-02-14T19:15:42.969213Z" }, "attributes": { "classes": [], "id": "", "n": "3" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in /Users/muli/miniconda3/lib/python3.7/site-packages (0.24.1)\n", "Requirement already satisfied: numpy>=1.12.0 in /Users/muli/miniconda3/lib/python3.7/site-packages (from pandas) (1.14.6)\n", "Requirement already satisfied: pytz>=2011k in /Users/muli/miniconda3/lib/python3.7/site-packages (from pandas) (2018.9)\n", "Requirement already satisfied: python-dateutil>=2.5.0 in /Users/muli/miniconda3/lib/python3.7/site-packages (from pandas) (2.7.5)\n", "Requirement already satisfied: six>=1.5 in /Users/muli/miniconda3/lib/python3.7/site-packages (from python-dateutil>=2.5.0->pandas) (1.12.0)\n" ] } ], "source": [ "# If pandas is not installed, please uncomment the following line:\n", "!pip install pandas\n", "\n", "%matplotlib inline\n", "import d2l\n", "from mxnet import autograd, gluon, init, nd\n", "from mxnet.gluon import data as gdata, loss as gloss, nn, utils\n", "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Accessing and Reading Data Sets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-02-14T19:15:45.819464Z", "start_time": "2019-02-14T19:15:45.691844Z" }, "attributes": { "classes": [], "id": "", "n": "14" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading kaggle_house_pred_train.csv from https://github.com/d2l-ai/d2l-en/raw/master/data/kaggle_house_pred_train.csv...\n", "Downloading kaggle_house_pred_test.csv from https://github.com/d2l-ai/d2l-en/raw/master/data/kaggle_house_pred_test.csv...\n", "(1460, 81)\n", "(1459, 80)\n" ] } ], "source": [ "utils.download('https://github.com/d2l-ai/d2l-en/raw/master/data/kaggle_house_pred_train.csv')\n", "utils.download('https://github.com/d2l-ai/d2l-en/raw/master/data/kaggle_house_pred_test.csv')\n", "train_data = pd.read_csv('kaggle_house_pred_train.csv')\n", "test_data = pd.read_csv('kaggle_house_pred_test.csv')\n", "print(train_data.shape)\n", "print(test_data.shape)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## A Quick View of the Data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-02-14T19:15:45.881547Z", "start_time": "2019-02-14T19:15:45.826648Z" }, "attributes": { "classes": [], "id": "", "n": "28" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | Id | \n", "MSSubClass | \n", "MSZoning | \n", "LotFrontage | \n", "SaleType | \n", "SaleCondition | \n", "SalePrice | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "60 | \n", "RL | \n", "65.0 | \n", "WD | \n", "Normal | \n", "208500 | \n", "
1 | \n", "2 | \n", "20 | \n", "RL | \n", "80.0 | \n", "WD | \n", "Normal | \n", "181500 | \n", "
2 | \n", "3 | \n", "60 | \n", "RL | \n", "68.0 | \n", "WD | \n", "Normal | \n", "223500 | \n", "
3 | \n", "4 | \n", "70 | \n", "RL | \n", "60.0 | \n", "WD | \n", "Abnorml | \n", "140000 | \n", "