Initial James Peret's iPython Notebook commit

I have been toying around with this for a while and now i'm backing up all my files so its time to commit this code.

James Peret 7 years ago
commit
12c4ab6004

+ 284 - 0
.ipynb_checkpoints/adafruit-scraper-checkpoint.ipynb

@@ -0,0 +1,284 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Adafruit Scraper\n",
8
+    "\n",
9
+    "Quick script that loads a file with a list of product links from [adafruit](http://www.adfruit.com), then scrapes each page to grab the title, price and image url, and finally writes all the data in to a JSON file."
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "code",
14
+   "execution_count": 25,
15
+   "metadata": {},
16
+   "outputs": [
17
+    {
18
+     "data": {
19
+      "text/html": [
20
+       "<div>\n",
21
+       "<table border=\"1\" class=\"dataframe\">\n",
22
+       "  <thead>\n",
23
+       "    <tr style=\"text-align: right;\">\n",
24
+       "      <th></th>\n",
25
+       "      <th>url</th>\n",
26
+       "    </tr>\n",
27
+       "  </thead>\n",
28
+       "  <tbody>\n",
29
+       "    <tr>\n",
30
+       "      <th>0</th>\n",
31
+       "      <td>https://www.adafruit.com/product/1782</td>\n",
32
+       "    </tr>\n",
33
+       "    <tr>\n",
34
+       "      <th>1</th>\n",
35
+       "      <td>https://www.adafruit.com/product/1766</td>\n",
36
+       "    </tr>\n",
37
+       "    <tr>\n",
38
+       "      <th>2</th>\n",
39
+       "      <td>https://www.adafruit.com/product/2652</td>\n",
40
+       "    </tr>\n",
41
+       "    <tr>\n",
42
+       "      <th>3</th>\n",
43
+       "      <td>https://www.adafruit.com/product/189</td>\n",
44
+       "    </tr>\n",
45
+       "    <tr>\n",
46
+       "      <th>4</th>\n",
47
+       "      <td>https://www.adafruit.com/product/439</td>\n",
48
+       "    </tr>\n",
49
+       "  </tbody>\n",
50
+       "</table>\n",
51
+       "</div>"
52
+      ],
53
+      "text/plain": [
54
+       "                                     url\n",
55
+       "0  https://www.adafruit.com/product/1782\n",
56
+       "1  https://www.adafruit.com/product/1766\n",
57
+       "2  https://www.adafruit.com/product/2652\n",
58
+       "3   https://www.adafruit.com/product/189\n",
59
+       "4   https://www.adafruit.com/product/439"
60
+      ]
61
+     },
62
+     "execution_count": 25,
63
+     "metadata": {},
64
+     "output_type": "execute_result"
65
+    }
66
+   ],
67
+   "source": [
68
+    "# import Data\n",
69
+    "import pandas as pd\n",
70
+    "data = pd.read_csv('adafruit-links.txt', sep=\" \", header = None)\n",
71
+    "data.columns = [\"url\"]\n",
72
+    "data.head()  # Will show the DataFrame in Jupyter Notebooks"
73
+   ]
74
+  },
75
+  {
76
+   "cell_type": "code",
77
+   "execution_count": 26,
78
+   "metadata": {},
79
+   "outputs": [
80
+    {
81
+     "name": "stdout",
82
+     "output_type": "stream",
83
+     "text": [
84
+      "{'title': 'MCP9808 High Accuracy I2C Temperature Sensor Breakout Board', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1782', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1782-00.jpg'}\n",
85
+      "{'title': 'Fast Vibration Sensor Switch (Easy to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1766', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1766-00.jpg'}\n",
86
+      "{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n",
87
+      "{'title': 'PIR (motion) sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/189', 'image_url': 'https://cdn-shop.adafruit.com/970x728/189-00.jpg'}\n",
88
+      "{'title': 'Adafruit TSL2561 Digital Luminosity/Lux/Light Sensor Breakout', 'price': '5.95', 'url': 'https://www.adafruit.com/product/439', 'image_url': 'https://cdn-shop.adafruit.com/970x728/439-00.jpg'}\n",
89
+      "{'title': 'Medium Vibration Sensor Switch', 'price': '0.95', 'url': 'https://www.adafruit.com/product/2384', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2384-00.jpg'}\n",
90
+      "{'title': 'Membrane 3x4 Matrix Keypad + extras - 3x4', 'price': '3.95', 'url': 'https://www.adafruit.com/product/419', 'image_url': 'https://cdn-shop.adafruit.com/970x728/419-05.jpg'}\n",
91
+      "{'title': 'Slow Vibration Sensor Switch (Hard to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1767', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1767-00.jpg'}\n",
92
+      "{'title': 'Adafruit BMP280 I2C or SPI Barometric Pressure & Altitude Sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/2651', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2651-00.jpg'}\n",
93
+      "{'title': 'Adafruit Si7021 Temperature & Humidity Sensor Breakout Board', 'price': '6.95', 'url': 'https://www.adafruit.com/product/3251', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3251-00.jpg'}\n",
94
+      "{'title': 'Magnetic contact switch (door sensor)', 'price': '3.95', 'url': 'https://www.adafruit.com/product/375', 'image_url': 'https://cdn-shop.adafruit.com/970x728/375-00.jpg'}\n",
95
+      "{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n",
96
+      "{'title': 'Tilt ball switch', 'price': '2.00', 'url': 'https://www.adafruit.com/product/173', 'image_url': 'https://cdn-shop.adafruit.com/970x728/173-00.jpg'}\n",
97
+      "{'title': 'TMP36 - Analog Temperature sensor - TMP36', 'price': '1.50', 'url': 'https://www.adafruit.com/product/165', 'image_url': 'https://cdn-shop.adafruit.com/970x728/165-00.jpg'}\n",
98
+      "{'title': 'Mini 8-Way Rotary Selector Switch - SP8T', 'price': '1.95', 'url': 'https://www.adafruit.com/product/2925', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2925-00.jpg'}\n",
99
+      "{'title': 'Waterproof DS18B20 Digital temperature sensor + extras', 'price': '9.95', 'url': 'https://www.adafruit.com/product/381', 'image_url': 'https://cdn-shop.adafruit.com/970x728/381-00.jpg'}\n",
100
+      "{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n",
101
+      "{'title': 'Raspberry Pi Zero W Camera Pack - Includes Pi Zero W', 'price': '44.95', 'url': 'https://www.adafruit.com/product/3414', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3414-05.jpg'}\n",
102
+      "{'title': 'Mechanical Decade Counters - Small Size - Pack of 5', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1084', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1084-00.jpg'}\n",
103
+      "{'title': 'DHT11 basic temperature-humidity sensor + extras', 'price': '5.00', 'url': 'https://www.adafruit.com/product/386', 'image_url': 'https://cdn-shop.adafruit.com/970x728/386-00.jpg'}\n",
104
+      "{'title': 'Electroluminescent (EL) Panel - 10cm x 10cm Blue', 'price': '13.95', 'url': 'https://www.adafruit.com/product/624', 'image_url': 'https://cdn-shop.adafruit.com/970x728/624-00.jpg'}\n",
105
+      "{'title': 'IDC Breakout Helper - 2x20 (40 pin)', 'price': '2.25', 'url': 'https://www.adafruit.com/product/2270', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2270-04.jpg'}\n",
106
+      "{'title': 'Peltier Thermo-Electric Cooler Module - 12V 5A', 'price': '11.95', 'url': 'https://www.adafruit.com/product/1330', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1330-02.jpg'}\n",
107
+      "{'title': 'Membrane LED Keypad + extras', 'price': '2.95', 'url': 'https://www.adafruit.com/product/1333', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1333-00.jpg'}\n",
108
+      "{'title': 'Peltier Thermo-Electric Cooler Module - 5V 1A', 'price': '14.95', 'url': 'https://www.adafruit.com/product/1331', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1331-04.jpg'}\n",
109
+      "{'title': 'Adafruit Feather HUZZAH with ESP8266 WiFi', 'price': '16.95', 'url': 'https://www.adafruit.com/product/2821', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2821-01.jpg'}\n",
110
+      "{'title': 'Bi-Color (Red/Green) 24-Bar Bargraph w/I2C Backpack Kit', 'price': '9.95', 'url': 'https://www.adafruit.com/product/1721', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1721-00.jpg'}\n",
111
+      "{'title': '10 Segment Light Bar Graph LED Display - Blue - KWL-R1025BB', 'price': '1.95', 'url': 'https://www.adafruit.com/product/1815', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1815-04.jpg'}\n",
112
+      "{'title': 'Tower Light - Red Alert Light with Buzzer - 12VDC', 'price': '24.95', 'url': 'https://www.adafruit.com/product/2994', 'image_url': 'https://cdn-shop.adafruit.com/product-videos/1024x768/2994-01.jpg'}\n",
113
+      "{'title': 'Miniature 8x8 Red LED Matrix', 'price': '3.95', 'url': 'https://www.adafruit.com/product/454', 'image_url': 'https://cdn-shop.adafruit.com/970x728/454-04.jpg'}\n",
114
+      "{'title': 'Small 1.2\" 8x8 Bi-Color (Red/Green) Square LED Matrix', 'price': '7.95', 'url': 'https://www.adafruit.com/product/458', 'image_url': 'https://cdn-shop.adafruit.com/970x728/458-00.jpg'}\n",
115
+      "{'title': 'Stereo Bonnet Pack for Raspberry Pi Zero W - Includes Pi Zero W', 'price': '34.95', 'url': 'https://www.adafruit.com/product/3412', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3412-01.jpg'}\n",
116
+      "{'title': 'LED Illuminated Pushbutton - 30mm Square', 'price': '3.95', 'url': 'https://www.adafruit.com/product/491', 'image_url': 'https://cdn-shop.adafruit.com/970x728/491-00.jpg'}\n",
117
+      "{'title': 'Adafruit 128x64 OLED Bonnet for Raspberry Pi', 'price': '22.50', 'url': 'https://www.adafruit.com/product/3531', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3531-00.jpg'}\n",
118
+      "{'title': 'Adafruit Sensiron SHT31-D Temperature & Humidity Sensor Breakout', 'price': '13.95', 'url': 'https://www.adafruit.com/product/2857', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2857-04.jpg'}\n",
119
+      "{'title': 'Adafruit PiOLED - 128x32 Monochrome OLED Add-on for Raspberry Pi', 'price': '14.95', 'url': 'https://www.adafruit.com/product/3527', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3527-04.jpg'}\n",
120
+      "{'title': 'Micro B USB Cable with LCD Voltage / Current Display', 'price': '7.50', 'url': 'https://www.adafruit.com/product/3388', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3388-01.jpg'}\n",
121
+      "{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n",
122
+      "{'title': 'Adafruit I2S 3W Stereo Speaker Bonnet for Raspberry Pi - Mini Kit', 'price': '12.95', 'url': 'https://www.adafruit.com/product/3346', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3346-01.jpg'}\n"
123
+     ]
124
+    }
125
+   ],
126
+   "source": [
127
+    "import requests\n",
128
+    "from bs4 import BeautifulSoup\n",
129
+    "\n",
130
+    "list = []\n",
131
+    "\n",
132
+    "for a in data.url:\n",
133
+    "    result = requests.get(a)\n",
134
+    "    c = result.content\n",
135
+    "    soup = BeautifulSoup(c, \"html5lib\")\n",
136
+    "    title = soup.find_all(\"h1\")[0].string\n",
137
+    "    price = soup.find(itemprop=\"price\").get(\"content\")\n",
138
+    "    image_url = soup.find(itemprop=\"image\").get(\"src\")\n",
139
+    "    obj = {\n",
140
+    "        \"title\": title,\n",
141
+    "        \"price\": price,\n",
142
+    "        \"url\": a,\n",
143
+    "        \"image_url\": image_url\n",
144
+    "    }\n",
145
+    "    list.append(obj)\n",
146
+    "    print(obj)\n"
147
+   ]
148
+  },
149
+  {
150
+   "cell_type": "code",
151
+   "execution_count": 27,
152
+   "metadata": {},
153
+   "outputs": [
154
+    {
155
+     "data": {
156
+      "text/html": [
157
+       "<div>\n",
158
+       "<table border=\"1\" class=\"dataframe\">\n",
159
+       "  <thead>\n",
160
+       "    <tr style=\"text-align: right;\">\n",
161
+       "      <th></th>\n",
162
+       "      <th>0</th>\n",
163
+       "      <th>1</th>\n",
164
+       "      <th>2</th>\n",
165
+       "      <th>3</th>\n",
166
+       "      <th>4</th>\n",
167
+       "      <th>5</th>\n",
168
+       "      <th>6</th>\n",
169
+       "      <th>7</th>\n",
170
+       "      <th>8</th>\n",
171
+       "      <th>9</th>\n",
172
+       "      <th>...</th>\n",
173
+       "      <th>308</th>\n",
174
+       "      <th>309</th>\n",
175
+       "      <th>310</th>\n",
176
+       "      <th>311</th>\n",
177
+       "      <th>312</th>\n",
178
+       "      <th>313</th>\n",
179
+       "      <th>314</th>\n",
180
+       "      <th>315</th>\n",
181
+       "      <th>316</th>\n",
182
+       "      <th>317</th>\n",
183
+       "    </tr>\n",
184
+       "  </thead>\n",
185
+       "  <tbody>\n",
186
+       "    <tr>\n",
187
+       "      <th>0</th>\n",
188
+       "      <td>[{\"title\":</td>\n",
189
+       "      <td>MCP9808 High Accuracy I2C Temperature Sensor B...</td>\n",
190
+       "      <td>price:</td>\n",
191
+       "      <td>4.95,</td>\n",
192
+       "      <td>url:</td>\n",
193
+       "      <td>https://www.adafruit.com/product/1782,</td>\n",
194
+       "      <td>image_url:</td>\n",
195
+       "      <td>https://cdn-shop.adafruit.com/970x728/1782-00....</td>\n",
196
+       "      <td>{\"title\":</td>\n",
197
+       "      <td>Fast Vibration Sensor Switch (Easy to trigger),</td>\n",
198
+       "      <td>...</td>\n",
199
+       "      <td>image_url:</td>\n",
200
+       "      <td>https://cdn-shop.adafruit.com/970x728/2652-00....</td>\n",
201
+       "      <td>{\"title\":</td>\n",
202
+       "      <td>Adafruit I2S 3W Stereo Speaker Bonnet for Rasp...</td>\n",
203
+       "      <td>price:</td>\n",
204
+       "      <td>12.95,</td>\n",
205
+       "      <td>url:</td>\n",
206
+       "      <td>https://www.adafruit.com/product/3346,</td>\n",
207
+       "      <td>image_url:</td>\n",
208
+       "      <td>https://cdn-shop.adafruit.com/970x728/3346-01....</td>\n",
209
+       "    </tr>\n",
210
+       "  </tbody>\n",
211
+       "</table>\n",
212
+       "<p>1 rows × 318 columns</p>\n",
213
+       "</div>"
214
+      ],
215
+      "text/plain": [
216
+       "          0                                                  1       2    \\\n",
217
+       "0  [{\"title\":  MCP9808 High Accuracy I2C Temperature Sensor B...  price:   \n",
218
+       "\n",
219
+       "     3     4                                       5           6    \\\n",
220
+       "0  4.95,  url:  https://www.adafruit.com/product/1782,  image_url:   \n",
221
+       "\n",
222
+       "                                                 7          8    \\\n",
223
+       "0  https://cdn-shop.adafruit.com/970x728/1782-00....  {\"title\":   \n",
224
+       "\n",
225
+       "                                               9    \\\n",
226
+       "0  Fast Vibration Sensor Switch (Easy to trigger),   \n",
227
+       "\n",
228
+       "                         ...                                 308  \\\n",
229
+       "0                        ...                          image_url:   \n",
230
+       "\n",
231
+       "                                                 309        310  \\\n",
232
+       "0  https://cdn-shop.adafruit.com/970x728/2652-00....  {\"title\":   \n",
233
+       "\n",
234
+       "                                                 311     312     313   314  \\\n",
235
+       "0  Adafruit I2S 3W Stereo Speaker Bonnet for Rasp...  price:  12.95,  url:   \n",
236
+       "\n",
237
+       "                                      315         316  \\\n",
238
+       "0  https://www.adafruit.com/product/3346,  image_url:   \n",
239
+       "\n",
240
+       "                                                 317  \n",
241
+       "0  https://cdn-shop.adafruit.com/970x728/3346-01....  \n",
242
+       "\n",
243
+       "[1 rows x 318 columns]"
244
+      ]
245
+     },
246
+     "execution_count": 27,
247
+     "metadata": {},
248
+     "output_type": "execute_result"
249
+    }
250
+   ],
251
+   "source": [
252
+    "# Export Data\n",
253
+    "import json\n",
254
+    "#out = list.to_json(orient='records', lines=True)\n",
255
+    "out = json.dumps(list)\n",
256
+    "with open('adafruit-components.json', 'w') as f:\n",
257
+    "    f.write(out)\n",
258
+    "new_file = pd.read_csv('adafruit-components.json', sep=\" \", header = None)\n",
259
+    "new_file.head()"
260
+   ]
261
+  }
262
+ ],
263
+ "metadata": {
264
+  "kernelspec": {
265
+   "display_name": "Python 3",
266
+   "language": "python",
267
+   "name": "python3"
268
+  },
269
+  "language_info": {
270
+   "codemirror_mode": {
271
+    "name": "ipython",
272
+    "version": 3
273
+   },
274
+   "file_extension": ".py",
275
+   "mimetype": "text/x-python",
276
+   "name": "python",
277
+   "nbconvert_exporter": "python",
278
+   "pygments_lexer": "ipython3",
279
+   "version": "3.6.0"
280
+  }
281
+ },
282
+ "nbformat": 4,
283
+ "nbformat_minor": 2
284
+}

+ 6 - 0
.ipynb_checkpoints/example-plot-checkpoint.ipynb

@@ -0,0 +1,6 @@
1
+{
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 2
6
+}

+ 276 - 0
.ipynb_checkpoints/load-save-data-checkpoint.ipynb

@@ -0,0 +1,276 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Load and save data\n",
8
+    "\n",
9
+    "Exploring multiple ways of loading and saving data with python. "
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "markdown",
14
+   "metadata": {},
15
+   "source": [
16
+    "## Open a file and load data\n",
17
+    "\n",
18
+    "To load data from a file, use [pandas](https://pandas.pydata.org):"
19
+   ]
20
+  },
21
+  {
22
+   "cell_type": "code",
23
+   "execution_count": 4,
24
+   "metadata": {},
25
+   "outputs": [
26
+    {
27
+     "data": {
28
+      "text/html": [
29
+       "<div>\n",
30
+       "<table border=\"1\" class=\"dataframe\">\n",
31
+       "  <thead>\n",
32
+       "    <tr style=\"text-align: right;\">\n",
33
+       "      <th></th>\n",
34
+       "      <th>0</th>\n",
35
+       "    </tr>\n",
36
+       "  </thead>\n",
37
+       "  <tbody>\n",
38
+       "    <tr>\n",
39
+       "      <th>0</th>\n",
40
+       "      <td>https://www.adafruit.com/product/1782</td>\n",
41
+       "    </tr>\n",
42
+       "    <tr>\n",
43
+       "      <th>1</th>\n",
44
+       "      <td>https://www.adafruit.com/product/1766</td>\n",
45
+       "    </tr>\n",
46
+       "  </tbody>\n",
47
+       "</table>\n",
48
+       "</div>"
49
+      ],
50
+      "text/plain": [
51
+       "                                       0\n",
52
+       "0  https://www.adafruit.com/product/1782\n",
53
+       "1  https://www.adafruit.com/product/1766"
54
+      ]
55
+     },
56
+     "execution_count": 4,
57
+     "metadata": {},
58
+     "output_type": "execute_result"
59
+    }
60
+   ],
61
+   "source": [
62
+    "import pandas as pd\n",
63
+    "data = pd.read_csv('example-data.txt', sep=\" \", header = None)\n",
64
+    "data.head()  # Will show the DataFrame in Jupyter Notebooks"
65
+   ]
66
+  },
67
+  {
68
+   "cell_type": "code",
69
+   "execution_count": 5,
70
+   "metadata": {},
71
+   "outputs": [
72
+    {
73
+     "data": {
74
+      "text/plain": [
75
+       "'https://www.adafruit.com/product/1782'"
76
+      ]
77
+     },
78
+     "execution_count": 5,
79
+     "metadata": {},
80
+     "output_type": "execute_result"
81
+    }
82
+   ],
83
+   "source": [
84
+    "# Label columns from the dataset\n",
85
+    "data.columns = [\"url\"]\n",
86
+    "data.url[0]"
87
+   ]
88
+  },
89
+  {
90
+   "cell_type": "markdown",
91
+   "metadata": {},
92
+   "source": [
93
+    "More information on working with text data in pandas can be found [here](https://pandas.pydata.org/pandas-docs/stable/text.html)"
94
+   ]
95
+  },
96
+  {
97
+   "cell_type": "markdown",
98
+   "metadata": {},
99
+   "source": [
100
+    "## Save data TXT  file\n",
101
+    "\n",
102
+    "To save data to a file like TXT or any other type, use the code snippet below. Remember that data in each filetype needs its own structure. For example, a CVS file needs commas to separate the data.\n",
103
+    "\n",
104
+    "When saving a *pandas* dataset directly to file like this, each row will be converted to a line and each column will be separated by a space."
105
+   ]
106
+  },
107
+  {
108
+   "cell_type": "code",
109
+   "execution_count": 11,
110
+   "metadata": {},
111
+   "outputs": [],
112
+   "source": [
113
+    "filename = 'example-data.txt'\n",
114
+    "data.url.to_csv(filename, index=False, encoding='utf-8')"
115
+   ]
116
+  },
117
+  {
118
+   "cell_type": "markdown",
119
+   "metadata": {},
120
+   "source": [
121
+    "To test if the file was saved:"
122
+   ]
123
+  },
124
+  {
125
+   "cell_type": "code",
126
+   "execution_count": 10,
127
+   "metadata": {},
128
+   "outputs": [
129
+    {
130
+     "data": {
131
+      "text/html": [
132
+       "<div>\n",
133
+       "<table border=\"1\" class=\"dataframe\">\n",
134
+       "  <thead>\n",
135
+       "    <tr style=\"text-align: right;\">\n",
136
+       "      <th></th>\n",
137
+       "      <th>0</th>\n",
138
+       "    </tr>\n",
139
+       "  </thead>\n",
140
+       "  <tbody>\n",
141
+       "    <tr>\n",
142
+       "      <th>0</th>\n",
143
+       "      <td>https://www.adafruit.com/product/1782</td>\n",
144
+       "    </tr>\n",
145
+       "    <tr>\n",
146
+       "      <th>1</th>\n",
147
+       "      <td>https://www.adafruit.com/product/1766</td>\n",
148
+       "    </tr>\n",
149
+       "  </tbody>\n",
150
+       "</table>\n",
151
+       "</div>"
152
+      ],
153
+      "text/plain": [
154
+       "                                       0\n",
155
+       "0  https://www.adafruit.com/product/1782\n",
156
+       "1  https://www.adafruit.com/product/1766"
157
+      ]
158
+     },
159
+     "execution_count": 10,
160
+     "metadata": {},
161
+     "output_type": "execute_result"
162
+    }
163
+   ],
164
+   "source": [
165
+    "saved_data = pd.read_csv(filename, sep=\" \", header = None)\n",
166
+    "saved_data.head()"
167
+   ]
168
+  },
169
+  {
170
+   "cell_type": "markdown",
171
+   "metadata": {},
172
+   "source": [
173
+    "## Save data to a JSON file\n",
174
+    "\n",
175
+    "To save a file to json, use the [pandas to_json function](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html)"
176
+   ]
177
+  },
178
+  {
179
+   "cell_type": "code",
180
+   "execution_count": 7,
181
+   "metadata": {
182
+    "collapsed": true,
183
+    "scrolled": true
184
+   },
185
+   "outputs": [],
186
+   "source": [
187
+    "import json\n",
188
+    "out = data.url.to_json(orient='records', lines=True)\n",
189
+    "with open('example-data.json', 'w') as f:\n",
190
+    "    f.write(out)"
191
+   ]
192
+  },
193
+  {
194
+   "cell_type": "markdown",
195
+   "metadata": {},
196
+   "source": [
197
+    "Now lets open this file and check if the data is valid:"
198
+   ]
199
+  },
200
+  {
201
+   "cell_type": "code",
202
+   "execution_count": 9,
203
+   "metadata": {},
204
+   "outputs": [
205
+    {
206
+     "data": {
207
+      "text/html": [
208
+       "<div>\n",
209
+       "<table border=\"1\" class=\"dataframe\">\n",
210
+       "  <thead>\n",
211
+       "    <tr style=\"text-align: right;\">\n",
212
+       "      <th></th>\n",
213
+       "      <th>url</th>\n",
214
+       "    </tr>\n",
215
+       "  </thead>\n",
216
+       "  <tbody>\n",
217
+       "    <tr>\n",
218
+       "      <th>0</th>\n",
219
+       "      <td>https://www.adafruit.com/product/1782</td>\n",
220
+       "    </tr>\n",
221
+       "    <tr>\n",
222
+       "      <th>1</th>\n",
223
+       "      <td>https://www.adafruit.com/product/1766</td>\n",
224
+       "    </tr>\n",
225
+       "  </tbody>\n",
226
+       "</table>\n",
227
+       "</div>"
228
+      ],
229
+      "text/plain": [
230
+       "                                     url\n",
231
+       "0  https://www.adafruit.com/product/1782\n",
232
+       "1  https://www.adafruit.com/product/1766"
233
+      ]
234
+     },
235
+     "execution_count": 9,
236
+     "metadata": {},
237
+     "output_type": "execute_result"
238
+    }
239
+   ],
240
+   "source": [
241
+    "new_file = pd.read_csv('example-data.json', sep=\" \", header = None)\n",
242
+    "data.head()"
243
+   ]
244
+  },
245
+  {
246
+   "cell_type": "code",
247
+   "execution_count": null,
248
+   "metadata": {
249
+    "collapsed": true
250
+   },
251
+   "outputs": [],
252
+   "source": []
253
+  }
254
+ ],
255
+ "metadata": {
256
+  "kernelspec": {
257
+   "display_name": "Python 3",
258
+   "language": "python",
259
+   "name": "python3"
260
+  },
261
+  "language_info": {
262
+   "codemirror_mode": {
263
+    "name": "ipython",
264
+    "version": 3
265
+   },
266
+   "file_extension": ".py",
267
+   "mimetype": "text/x-python",
268
+   "name": "python",
269
+   "nbconvert_exporter": "python",
270
+   "pygments_lexer": "ipython3",
271
+   "version": "3.6.0"
272
+  }
273
+ },
274
+ "nbformat": 4,
275
+ "nbformat_minor": 2
276
+}

+ 153 - 0
.ipynb_checkpoints/scraping-example-checkpoint.ipynb

@@ -0,0 +1,153 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "code",
5
+   "execution_count": 45,
6
+   "metadata": {
7
+    "collapsed": true
8
+   },
9
+   "outputs": [],
10
+   "source": [
11
+    "from IPython.display import Image\n",
12
+    "from IPython.core.display import HTML "
13
+   ]
14
+  },
15
+  {
16
+   "cell_type": "code",
17
+   "execution_count": 35,
18
+   "metadata": {},
19
+   "outputs": [
20
+    {
21
+     "data": {
22
+      "text/plain": [
23
+       "200"
24
+      ]
25
+     },
26
+     "execution_count": 35,
27
+     "metadata": {},
28
+     "output_type": "execute_result"
29
+    }
30
+   ],
31
+   "source": [
32
+    "import requests\n",
33
+    "result = requests.get(\"https://www.adafruit.com/product/1782\")\n",
34
+    "c = result.content\n",
35
+    "result.status_code"
36
+   ]
37
+  },
38
+  {
39
+   "cell_type": "code",
40
+   "execution_count": 41,
41
+   "metadata": {},
42
+   "outputs": [
43
+    {
44
+     "name": "stdout",
45
+     "output_type": "stream",
46
+     "text": [
47
+      "MCP9808 High Accuracy I2C Temperature Sensor Breakout Board\n"
48
+     ]
49
+    }
50
+   ],
51
+   "source": [
52
+    "from bs4 import BeautifulSoup\n",
53
+    "soup = BeautifulSoup(c, \"html5lib\")\n",
54
+    "title = soup.find_all(\"h1\")[0].string\n",
55
+    "print(title)"
56
+   ]
57
+  },
58
+  {
59
+   "cell_type": "code",
60
+   "execution_count": 42,
61
+   "metadata": {},
62
+   "outputs": [
63
+    {
64
+     "name": "stdout",
65
+     "output_type": "stream",
66
+     "text": [
67
+      "4.95\n"
68
+     ]
69
+    }
70
+   ],
71
+   "source": [
72
+    "price = soup.find(itemprop=\"price\").get(\"content\")\n",
73
+    "print(price)"
74
+   ]
75
+  },
76
+  {
77
+   "cell_type": "code",
78
+   "execution_count": 47,
79
+   "metadata": {},
80
+   "outputs": [
81
+    {
82
+     "data": {
83
+      "text/html": [
84
+       "<img src=\"https://cdn-shop.adafruit.com/970x728/1782-00.jpg\" width=\"100\" height=\"100\"/>"
85
+      ],
86
+      "text/plain": [
87
+       "<IPython.core.display.Image object>"
88
+      ]
89
+     },
90
+     "execution_count": 47,
91
+     "metadata": {},
92
+     "output_type": "execute_result"
93
+    }
94
+   ],
95
+   "source": [
96
+    "image_url = soup.find(itemprop=\"image\").get(\"src\")\n",
97
+    "Image(url = image_url, width=100, height=100)"
98
+   ]
99
+  },
100
+  {
101
+   "cell_type": "code",
102
+   "execution_count": 39,
103
+   "metadata": {},
104
+   "outputs": [
105
+    {
106
+     "name": "stdout",
107
+     "output_type": "stream",
108
+     "text": [
109
+      "{'title': 'MCP9808 High Accuracy I2C Temperature Sensor Breakout Board', 'price': '4.95', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1782-00.jpg'}\n"
110
+     ]
111
+    }
112
+   ],
113
+   "source": [
114
+    "obj = {\n",
115
+    "    \"title\": title,\n",
116
+    "    \"price\": price,\n",
117
+    "    \"image_url\": image_url\n",
118
+    "}\n",
119
+    "print(obj)"
120
+   ]
121
+  },
122
+  {
123
+   "cell_type": "code",
124
+   "execution_count": null,
125
+   "metadata": {
126
+    "collapsed": true
127
+   },
128
+   "outputs": [],
129
+   "source": []
130
+  }
131
+ ],
132
+ "metadata": {
133
+  "kernelspec": {
134
+   "display_name": "Python 3",
135
+   "language": "python",
136
+   "name": "python3"
137
+  },
138
+  "language_info": {
139
+   "codemirror_mode": {
140
+    "name": "ipython",
141
+    "version": 3
142
+   },
143
+   "file_extension": ".py",
144
+   "mimetype": "text/x-python",
145
+   "name": "python",
146
+   "nbconvert_exporter": "python",
147
+   "pygments_lexer": "ipython3",
148
+   "version": "3.6.0"
149
+  }
150
+ },
151
+ "nbformat": 4,
152
+ "nbformat_minor": 2
153
+}

+ 1 - 0
adafruit-components.json

@@ -0,0 +1 @@
1
+[{"title": "MCP9808 High Accuracy I2C Temperature Sensor Breakout Board", "price": "4.95", "url": "https://www.adafruit.com/product/1782", "image_url": "https://cdn-shop.adafruit.com/970x728/1782-00.jpg"}, {"title": "Fast Vibration Sensor Switch (Easy to trigger)", "price": "0.95", "url": "https://www.adafruit.com/product/1766", "image_url": "https://cdn-shop.adafruit.com/970x728/1766-00.jpg"}, {"title": "Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor", "price": "19.95", "url": "https://www.adafruit.com/product/2652", "image_url": "https://cdn-shop.adafruit.com/970x728/2652-00.jpg"}, {"title": "PIR (motion) sensor", "price": "9.95", "url": "https://www.adafruit.com/product/189", "image_url": "https://cdn-shop.adafruit.com/970x728/189-00.jpg"}, {"title": "Adafruit TSL2561 Digital Luminosity/Lux/Light Sensor Breakout", "price": "5.95", "url": "https://www.adafruit.com/product/439", "image_url": "https://cdn-shop.adafruit.com/970x728/439-00.jpg"}, {"title": "Medium Vibration Sensor Switch", "price": "0.95", "url": "https://www.adafruit.com/product/2384", "image_url": "https://cdn-shop.adafruit.com/970x728/2384-00.jpg"}, {"title": "Membrane 3x4 Matrix Keypad + extras - 3x4", "price": "3.95", "url": "https://www.adafruit.com/product/419", "image_url": "https://cdn-shop.adafruit.com/970x728/419-05.jpg"}, {"title": "Slow Vibration Sensor Switch (Hard to trigger)", "price": "0.95", "url": "https://www.adafruit.com/product/1767", "image_url": "https://cdn-shop.adafruit.com/970x728/1767-00.jpg"}, {"title": "Adafruit BMP280 I2C or SPI Barometric Pressure & Altitude Sensor", "price": "9.95", "url": "https://www.adafruit.com/product/2651", "image_url": "https://cdn-shop.adafruit.com/970x728/2651-00.jpg"}, {"title": "Adafruit Si7021 Temperature & Humidity Sensor Breakout Board", "price": "6.95", "url": "https://www.adafruit.com/product/3251", "image_url": "https://cdn-shop.adafruit.com/970x728/3251-00.jpg"}, {"title": "Magnetic contact switch (door sensor)", "price": "3.95", "url": "https://www.adafruit.com/product/375", "image_url": "https://cdn-shop.adafruit.com/970x728/375-00.jpg"}, {"title": "DS18B20 Digital temperature sensor + extras", "price": "3.95", "url": "https://www.adafruit.com/product/374", "image_url": "https://cdn-shop.adafruit.com/970x728/374-00.jpg"}, {"title": "Tilt ball switch", "price": "2.00", "url": "https://www.adafruit.com/product/173", "image_url": "https://cdn-shop.adafruit.com/970x728/173-00.jpg"}, {"title": "TMP36 - Analog Temperature sensor - TMP36", "price": "1.50", "url": "https://www.adafruit.com/product/165", "image_url": "https://cdn-shop.adafruit.com/970x728/165-00.jpg"}, {"title": "Mini 8-Way Rotary Selector Switch - SP8T", "price": "1.95", "url": "https://www.adafruit.com/product/2925", "image_url": "https://cdn-shop.adafruit.com/970x728/2925-00.jpg"}, {"title": "Waterproof DS18B20 Digital temperature sensor + extras", "price": "9.95", "url": "https://www.adafruit.com/product/381", "image_url": "https://cdn-shop.adafruit.com/970x728/381-00.jpg"}, {"title": "DS18B20 Digital temperature sensor + extras", "price": "3.95", "url": "https://www.adafruit.com/product/374", "image_url": "https://cdn-shop.adafruit.com/970x728/374-00.jpg"}, {"title": "Raspberry Pi Zero W Camera Pack - Includes Pi Zero W", "price": "44.95", "url": "https://www.adafruit.com/product/3414", "image_url": "https://cdn-shop.adafruit.com/970x728/3414-05.jpg"}, {"title": "Mechanical Decade Counters - Small Size - Pack of 5", "price": "4.95", "url": "https://www.adafruit.com/product/1084", "image_url": "https://cdn-shop.adafruit.com/970x728/1084-00.jpg"}, {"title": "DHT11 basic temperature-humidity sensor + extras", "price": "5.00", "url": "https://www.adafruit.com/product/386", "image_url": "https://cdn-shop.adafruit.com/970x728/386-00.jpg"}, {"title": "Electroluminescent (EL) Panel - 10cm x 10cm Blue", "price": "13.95", "url": "https://www.adafruit.com/product/624", "image_url": "https://cdn-shop.adafruit.com/970x728/624-00.jpg"}, {"title": "IDC Breakout Helper - 2x20 (40 pin)", "price": "2.25", "url": "https://www.adafruit.com/product/2270", "image_url": "https://cdn-shop.adafruit.com/970x728/2270-04.jpg"}, {"title": "Peltier Thermo-Electric Cooler Module - 12V 5A", "price": "11.95", "url": "https://www.adafruit.com/product/1330", "image_url": "https://cdn-shop.adafruit.com/970x728/1330-02.jpg"}, {"title": "Membrane LED Keypad + extras", "price": "2.95", "url": "https://www.adafruit.com/product/1333", "image_url": "https://cdn-shop.adafruit.com/970x728/1333-00.jpg"}, {"title": "Peltier Thermo-Electric Cooler Module - 5V 1A", "price": "14.95", "url": "https://www.adafruit.com/product/1331", "image_url": "https://cdn-shop.adafruit.com/970x728/1331-04.jpg"}, {"title": "Adafruit Feather HUZZAH with ESP8266 WiFi", "price": "16.95", "url": "https://www.adafruit.com/product/2821", "image_url": "https://cdn-shop.adafruit.com/970x728/2821-01.jpg"}, {"title": "Bi-Color (Red/Green) 24-Bar Bargraph w/I2C Backpack Kit", "price": "9.95", "url": "https://www.adafruit.com/product/1721", "image_url": "https://cdn-shop.adafruit.com/970x728/1721-00.jpg"}, {"title": "10 Segment Light Bar Graph LED Display - Blue - KWL-R1025BB", "price": "1.95", "url": "https://www.adafruit.com/product/1815", "image_url": "https://cdn-shop.adafruit.com/970x728/1815-04.jpg"}, {"title": "Tower Light - Red Alert Light with Buzzer - 12VDC", "price": "24.95", "url": "https://www.adafruit.com/product/2994", "image_url": "https://cdn-shop.adafruit.com/product-videos/1024x768/2994-01.jpg"}, {"title": "Miniature 8x8 Red LED Matrix", "price": "3.95", "url": "https://www.adafruit.com/product/454", "image_url": "https://cdn-shop.adafruit.com/970x728/454-04.jpg"}, {"title": "Small 1.2\" 8x8 Bi-Color (Red/Green) Square LED Matrix", "price": "7.95", "url": "https://www.adafruit.com/product/458", "image_url": "https://cdn-shop.adafruit.com/970x728/458-00.jpg"}, {"title": "Stereo Bonnet Pack for Raspberry Pi Zero W - Includes Pi Zero W", "price": "34.95", "url": "https://www.adafruit.com/product/3412", "image_url": "https://cdn-shop.adafruit.com/970x728/3412-01.jpg"}, {"title": "LED Illuminated Pushbutton - 30mm Square", "price": "3.95", "url": "https://www.adafruit.com/product/491", "image_url": "https://cdn-shop.adafruit.com/970x728/491-00.jpg"}, {"title": "Adafruit 128x64 OLED Bonnet for Raspberry Pi", "price": "22.50", "url": "https://www.adafruit.com/product/3531", "image_url": "https://cdn-shop.adafruit.com/970x728/3531-00.jpg"}, {"title": "Adafruit Sensiron SHT31-D Temperature & Humidity Sensor Breakout", "price": "13.95", "url": "https://www.adafruit.com/product/2857", "image_url": "https://cdn-shop.adafruit.com/970x728/2857-04.jpg"}, {"title": "Adafruit PiOLED - 128x32 Monochrome OLED Add-on for Raspberry Pi", "price": "14.95", "url": "https://www.adafruit.com/product/3527", "image_url": "https://cdn-shop.adafruit.com/970x728/3527-04.jpg"}, {"title": "Micro B USB Cable with LCD Voltage / Current Display", "price": "7.50", "url": "https://www.adafruit.com/product/3388", "image_url": "https://cdn-shop.adafruit.com/970x728/3388-01.jpg"}, {"title": "Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor", "price": "19.95", "url": "https://www.adafruit.com/product/2652", "image_url": "https://cdn-shop.adafruit.com/970x728/2652-00.jpg"}, {"title": "Adafruit I2S 3W Stereo Speaker Bonnet for Raspberry Pi - Mini Kit", "price": "12.95", "url": "https://www.adafruit.com/product/3346", "image_url": "https://cdn-shop.adafruit.com/970x728/3346-01.jpg"}]

+ 39 - 0
adafruit-links.txt

@@ -0,0 +1,39 @@
1
+https://www.adafruit.com/product/1782
2
+https://www.adafruit.com/product/1766
3
+https://www.adafruit.com/product/2652
4
+https://www.adafruit.com/product/189
5
+https://www.adafruit.com/product/439
6
+https://www.adafruit.com/product/2384
7
+https://www.adafruit.com/product/419
8
+https://www.adafruit.com/product/1767
9
+https://www.adafruit.com/product/2651
10
+https://www.adafruit.com/product/3251
11
+https://www.adafruit.com/product/375
12
+https://www.adafruit.com/product/374
13
+https://www.adafruit.com/product/173
14
+https://www.adafruit.com/product/165
15
+https://www.adafruit.com/product/2925
16
+https://www.adafruit.com/product/381
17
+https://www.adafruit.com/product/374
18
+https://www.adafruit.com/product/3414
19
+https://www.adafruit.com/product/1084
20
+https://www.adafruit.com/product/386
21
+https://www.adafruit.com/product/624
22
+https://www.adafruit.com/product/2270
23
+https://www.adafruit.com/product/1330
24
+https://www.adafruit.com/product/1333
25
+https://www.adafruit.com/product/1331
26
+https://www.adafruit.com/product/2821
27
+https://www.adafruit.com/product/1721
28
+https://www.adafruit.com/product/1815
29
+https://www.adafruit.com/product/2994
30
+https://www.adafruit.com/product/454
31
+https://www.adafruit.com/product/458
32
+https://www.adafruit.com/product/3412
33
+https://www.adafruit.com/product/491
34
+https://www.adafruit.com/product/3531
35
+https://www.adafruit.com/product/2857
36
+https://www.adafruit.com/product/3527
37
+https://www.adafruit.com/product/3388
38
+https://www.adafruit.com/product/2652
39
+https://www.adafruit.com/product/3346

+ 284 - 0
adafruit-scraper.ipynb

@@ -0,0 +1,284 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Adafruit Scraper\n",
8
+    "\n",
9
+    "Quick script that loads a file with a list of product links from [adafruit](http://www.adfruit.com), then scrapes each page to grab the title, price and image url, and finally writes all the data in to a JSON file."
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "code",
14
+   "execution_count": 25,
15
+   "metadata": {},
16
+   "outputs": [
17
+    {
18
+     "data": {
19
+      "text/html": [
20
+       "<div>\n",
21
+       "<table border=\"1\" class=\"dataframe\">\n",
22
+       "  <thead>\n",
23
+       "    <tr style=\"text-align: right;\">\n",
24
+       "      <th></th>\n",
25
+       "      <th>url</th>\n",
26
+       "    </tr>\n",
27
+       "  </thead>\n",
28
+       "  <tbody>\n",
29
+       "    <tr>\n",
30
+       "      <th>0</th>\n",
31
+       "      <td>https://www.adafruit.com/product/1782</td>\n",
32
+       "    </tr>\n",
33
+       "    <tr>\n",
34
+       "      <th>1</th>\n",
35
+       "      <td>https://www.adafruit.com/product/1766</td>\n",
36
+       "    </tr>\n",
37
+       "    <tr>\n",
38
+       "      <th>2</th>\n",
39
+       "      <td>https://www.adafruit.com/product/2652</td>\n",
40
+       "    </tr>\n",
41
+       "    <tr>\n",
42
+       "      <th>3</th>\n",
43
+       "      <td>https://www.adafruit.com/product/189</td>\n",
44
+       "    </tr>\n",
45
+       "    <tr>\n",
46
+       "      <th>4</th>\n",
47
+       "      <td>https://www.adafruit.com/product/439</td>\n",
48
+       "    </tr>\n",
49
+       "  </tbody>\n",
50
+       "</table>\n",
51
+       "</div>"
52
+      ],
53
+      "text/plain": [
54
+       "                                     url\n",
55
+       "0  https://www.adafruit.com/product/1782\n",
56
+       "1  https://www.adafruit.com/product/1766\n",
57
+       "2  https://www.adafruit.com/product/2652\n",
58
+       "3   https://www.adafruit.com/product/189\n",
59
+       "4   https://www.adafruit.com/product/439"
60
+      ]
61
+     },
62
+     "execution_count": 25,
63
+     "metadata": {},
64
+     "output_type": "execute_result"
65
+    }
66
+   ],
67
+   "source": [
68
+    "# import Data\n",
69
+    "import pandas as pd\n",
70
+    "data = pd.read_csv('adafruit-links.txt', sep=\" \", header = None)\n",
71
+    "data.columns = [\"url\"]\n",
72
+    "data.head()  # Will show the DataFrame in Jupyter Notebooks"
73
+   ]
74
+  },
75
+  {
76
+   "cell_type": "code",
77
+   "execution_count": 26,
78
+   "metadata": {},
79
+   "outputs": [
80
+    {
81
+     "name": "stdout",
82
+     "output_type": "stream",
83
+     "text": [
84
+      "{'title': 'MCP9808 High Accuracy I2C Temperature Sensor Breakout Board', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1782', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1782-00.jpg'}\n",
85
+      "{'title': 'Fast Vibration Sensor Switch (Easy to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1766', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1766-00.jpg'}\n",
86
+      "{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n",
87
+      "{'title': 'PIR (motion) sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/189', 'image_url': 'https://cdn-shop.adafruit.com/970x728/189-00.jpg'}\n",
88
+      "{'title': 'Adafruit TSL2561 Digital Luminosity/Lux/Light Sensor Breakout', 'price': '5.95', 'url': 'https://www.adafruit.com/product/439', 'image_url': 'https://cdn-shop.adafruit.com/970x728/439-00.jpg'}\n",
89
+      "{'title': 'Medium Vibration Sensor Switch', 'price': '0.95', 'url': 'https://www.adafruit.com/product/2384', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2384-00.jpg'}\n",
90
+      "{'title': 'Membrane 3x4 Matrix Keypad + extras - 3x4', 'price': '3.95', 'url': 'https://www.adafruit.com/product/419', 'image_url': 'https://cdn-shop.adafruit.com/970x728/419-05.jpg'}\n",
91
+      "{'title': 'Slow Vibration Sensor Switch (Hard to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1767', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1767-00.jpg'}\n",
92
+      "{'title': 'Adafruit BMP280 I2C or SPI Barometric Pressure & Altitude Sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/2651', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2651-00.jpg'}\n",
93
+      "{'title': 'Adafruit Si7021 Temperature & Humidity Sensor Breakout Board', 'price': '6.95', 'url': 'https://www.adafruit.com/product/3251', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3251-00.jpg'}\n",
94
+      "{'title': 'Magnetic contact switch (door sensor)', 'price': '3.95', 'url': 'https://www.adafruit.com/product/375', 'image_url': 'https://cdn-shop.adafruit.com/970x728/375-00.jpg'}\n",
95
+      "{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n",
96
+      "{'title': 'Tilt ball switch', 'price': '2.00', 'url': 'https://www.adafruit.com/product/173', 'image_url': 'https://cdn-shop.adafruit.com/970x728/173-00.jpg'}\n",
97
+      "{'title': 'TMP36 - Analog Temperature sensor - TMP36', 'price': '1.50', 'url': 'https://www.adafruit.com/product/165', 'image_url': 'https://cdn-shop.adafruit.com/970x728/165-00.jpg'}\n",
98
+      "{'title': 'Mini 8-Way Rotary Selector Switch - SP8T', 'price': '1.95', 'url': 'https://www.adafruit.com/product/2925', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2925-00.jpg'}\n",
99
+      "{'title': 'Waterproof DS18B20 Digital temperature sensor + extras', 'price': '9.95', 'url': 'https://www.adafruit.com/product/381', 'image_url': 'https://cdn-shop.adafruit.com/970x728/381-00.jpg'}\n",
100
+      "{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n",
101
+      "{'title': 'Raspberry Pi Zero W Camera Pack - Includes Pi Zero W', 'price': '44.95', 'url': 'https://www.adafruit.com/product/3414', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3414-05.jpg'}\n",
102
+      "{'title': 'Mechanical Decade Counters - Small Size - Pack of 5', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1084', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1084-00.jpg'}\n",
103
+      "{'title': 'DHT11 basic temperature-humidity sensor + extras', 'price': '5.00', 'url': 'https://www.adafruit.com/product/386', 'image_url': 'https://cdn-shop.adafruit.com/970x728/386-00.jpg'}\n",
104
+      "{'title': 'Electroluminescent (EL) Panel - 10cm x 10cm Blue', 'price': '13.95', 'url': 'https://www.adafruit.com/product/624', 'image_url': 'https://cdn-shop.adafruit.com/970x728/624-00.jpg'}\n",
105
+      "{'title': 'IDC Breakout Helper - 2x20 (40 pin)', 'price': '2.25', 'url': 'https://www.adafruit.com/product/2270', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2270-04.jpg'}\n",
106
+      "{'title': 'Peltier Thermo-Electric Cooler Module - 12V 5A', 'price': '11.95', 'url': 'https://www.adafruit.com/product/1330', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1330-02.jpg'}\n",
107
+      "{'title': 'Membrane LED Keypad + extras', 'price': '2.95', 'url': 'https://www.adafruit.com/product/1333', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1333-00.jpg'}\n",
108
+      "{'title': 'Peltier Thermo-Electric Cooler Module - 5V 1A', 'price': '14.95', 'url': 'https://www.adafruit.com/product/1331', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1331-04.jpg'}\n",
109
+      "{'title': 'Adafruit Feather HUZZAH with ESP8266 WiFi', 'price': '16.95', 'url': 'https://www.adafruit.com/product/2821', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2821-01.jpg'}\n",
110
+      "{'title': 'Bi-Color (Red/Green) 24-Bar Bargraph w/I2C Backpack Kit', 'price': '9.95', 'url': 'https://www.adafruit.com/product/1721', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1721-00.jpg'}\n",
111
+      "{'title': '10 Segment Light Bar Graph LED Display - Blue - KWL-R1025BB', 'price': '1.95', 'url': 'https://www.adafruit.com/product/1815', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1815-04.jpg'}\n",
112
+      "{'title': 'Tower Light - Red Alert Light with Buzzer - 12VDC', 'price': '24.95', 'url': 'https://www.adafruit.com/product/2994', 'image_url': 'https://cdn-shop.adafruit.com/product-videos/1024x768/2994-01.jpg'}\n",
113
+      "{'title': 'Miniature 8x8 Red LED Matrix', 'price': '3.95', 'url': 'https://www.adafruit.com/product/454', 'image_url': 'https://cdn-shop.adafruit.com/970x728/454-04.jpg'}\n",
114
+      "{'title': 'Small 1.2\" 8x8 Bi-Color (Red/Green) Square LED Matrix', 'price': '7.95', 'url': 'https://www.adafruit.com/product/458', 'image_url': 'https://cdn-shop.adafruit.com/970x728/458-00.jpg'}\n",
115
+      "{'title': 'Stereo Bonnet Pack for Raspberry Pi Zero W - Includes Pi Zero W', 'price': '34.95', 'url': 'https://www.adafruit.com/product/3412', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3412-01.jpg'}\n",
116
+      "{'title': 'LED Illuminated Pushbutton - 30mm Square', 'price': '3.95', 'url': 'https://www.adafruit.com/product/491', 'image_url': 'https://cdn-shop.adafruit.com/970x728/491-00.jpg'}\n",
117
+      "{'title': 'Adafruit 128x64 OLED Bonnet for Raspberry Pi', 'price': '22.50', 'url': 'https://www.adafruit.com/product/3531', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3531-00.jpg'}\n",
118
+      "{'title': 'Adafruit Sensiron SHT31-D Temperature & Humidity Sensor Breakout', 'price': '13.95', 'url': 'https://www.adafruit.com/product/2857', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2857-04.jpg'}\n",
119
+      "{'title': 'Adafruit PiOLED - 128x32 Monochrome OLED Add-on for Raspberry Pi', 'price': '14.95', 'url': 'https://www.adafruit.com/product/3527', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3527-04.jpg'}\n",
120
+      "{'title': 'Micro B USB Cable with LCD Voltage / Current Display', 'price': '7.50', 'url': 'https://www.adafruit.com/product/3388', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3388-01.jpg'}\n",
121
+      "{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n",
122
+      "{'title': 'Adafruit I2S 3W Stereo Speaker Bonnet for Raspberry Pi - Mini Kit', 'price': '12.95', 'url': 'https://www.adafruit.com/product/3346', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3346-01.jpg'}\n"
123
+     ]
124
+    }
125
+   ],
126
+   "source": [
127
+    "import requests\n",
128
+    "from bs4 import BeautifulSoup\n",
129
+    "\n",
130
+    "list = []\n",
131
+    "\n",
132
+    "for a in data.url:\n",
133
+    "    result = requests.get(a)\n",
134
+    "    c = result.content\n",
135
+    "    soup = BeautifulSoup(c, \"html5lib\")\n",
136
+    "    title = soup.find_all(\"h1\")[0].string\n",
137
+    "    price = soup.find(itemprop=\"price\").get(\"content\")\n",
138
+    "    image_url = soup.find(itemprop=\"image\").get(\"src\")\n",
139
+    "    obj = {\n",
140
+    "        \"title\": title,\n",
141
+    "        \"price\": price,\n",
142
+    "        \"url\": a,\n",
143
+    "        \"image_url\": image_url\n",
144
+    "    }\n",
145
+    "    list.append(obj)\n",
146
+    "    print(obj)\n"
147
+   ]
148
+  },
149
+  {
150
+   "cell_type": "code",
151
+   "execution_count": 27,
152
+   "metadata": {},
153
+   "outputs": [
154
+    {
155
+     "data": {
156
+      "text/html": [
157
+       "<div>\n",
158
+       "<table border=\"1\" class=\"dataframe\">\n",
159
+       "  <thead>\n",
160
+       "    <tr style=\"text-align: right;\">\n",
161
+       "      <th></th>\n",
162
+       "      <th>0</th>\n",
163
+       "      <th>1</th>\n",
164
+       "      <th>2</th>\n",
165
+       "      <th>3</th>\n",
166
+       "      <th>4</th>\n",
167
+       "      <th>5</th>\n",
168
+       "      <th>6</th>\n",
169
+       "      <th>7</th>\n",
170
+       "      <th>8</th>\n",
171
+       "      <th>9</th>\n",
172
+       "      <th>...</th>\n",
173
+       "      <th>308</th>\n",
174
+       "      <th>309</th>\n",
175
+       "      <th>310</th>\n",
176
+       "      <th>311</th>\n",
177
+       "      <th>312</th>\n",
178
+       "      <th>313</th>\n",
179
+       "      <th>314</th>\n",
180
+       "      <th>315</th>\n",
181
+       "      <th>316</th>\n",
182
+       "      <th>317</th>\n",
183
+       "    </tr>\n",
184
+       "  </thead>\n",
185
+       "  <tbody>\n",
186
+       "    <tr>\n",
187
+       "      <th>0</th>\n",
188
+       "      <td>[{\"title\":</td>\n",
189
+       "      <td>MCP9808 High Accuracy I2C Temperature Sensor B...</td>\n",
190
+       "      <td>price:</td>\n",
191
+       "      <td>4.95,</td>\n",
192
+       "      <td>url:</td>\n",
193
+       "      <td>https://www.adafruit.com/product/1782,</td>\n",
194
+       "      <td>image_url:</td>\n",
195
+       "      <td>https://cdn-shop.adafruit.com/970x728/1782-00....</td>\n",
196
+       "      <td>{\"title\":</td>\n",
197
+       "      <td>Fast Vibration Sensor Switch (Easy to trigger),</td>\n",
198
+       "      <td>...</td>\n",
199
+       "      <td>image_url:</td>\n",
200
+       "      <td>https://cdn-shop.adafruit.com/970x728/2652-00....</td>\n",
201
+       "      <td>{\"title\":</td>\n",
202
+       "      <td>Adafruit I2S 3W Stereo Speaker Bonnet for Rasp...</td>\n",
203
+       "      <td>price:</td>\n",
204
+       "      <td>12.95,</td>\n",
205
+       "      <td>url:</td>\n",
206
+       "      <td>https://www.adafruit.com/product/3346,</td>\n",
207
+       "      <td>image_url:</td>\n",
208
+       "      <td>https://cdn-shop.adafruit.com/970x728/3346-01....</td>\n",
209
+       "    </tr>\n",
210
+       "  </tbody>\n",
211
+       "</table>\n",
212
+       "<p>1 rows × 318 columns</p>\n",
213
+       "</div>"
214
+      ],
215
+      "text/plain": [
216
+       "          0                                                  1       2    \\\n",
217
+       "0  [{\"title\":  MCP9808 High Accuracy I2C Temperature Sensor B...  price:   \n",
218
+       "\n",
219
+       "     3     4                                       5           6    \\\n",
220
+       "0  4.95,  url:  https://www.adafruit.com/product/1782,  image_url:   \n",
221
+       "\n",
222
+       "                                                 7          8    \\\n",
223
+       "0  https://cdn-shop.adafruit.com/970x728/1782-00....  {\"title\":   \n",
224
+       "\n",
225
+       "                                               9    \\\n",
226
+       "0  Fast Vibration Sensor Switch (Easy to trigger),   \n",
227
+       "\n",
228
+       "                         ...                                 308  \\\n",
229
+       "0                        ...                          image_url:   \n",
230
+       "\n",
231
+       "                                                 309        310  \\\n",
232
+       "0  https://cdn-shop.adafruit.com/970x728/2652-00....  {\"title\":   \n",
233
+       "\n",
234
+       "                                                 311     312     313   314  \\\n",
235
+       "0  Adafruit I2S 3W Stereo Speaker Bonnet for Rasp...  price:  12.95,  url:   \n",
236
+       "\n",
237
+       "                                      315         316  \\\n",
238
+       "0  https://www.adafruit.com/product/3346,  image_url:   \n",
239
+       "\n",
240
+       "                                                 317  \n",
241
+       "0  https://cdn-shop.adafruit.com/970x728/3346-01....  \n",
242
+       "\n",
243
+       "[1 rows x 318 columns]"
244
+      ]
245
+     },
246
+     "execution_count": 27,
247
+     "metadata": {},
248
+     "output_type": "execute_result"
249
+    }
250
+   ],
251
+   "source": [
252
+    "# Export Data\n",
253
+    "import json\n",
254
+    "#out = list.to_json(orient='records', lines=True)\n",
255
+    "out = json.dumps(list)\n",
256
+    "with open('adafruit-components.json', 'w') as f:\n",
257
+    "    f.write(out)\n",
258
+    "new_file = pd.read_csv('adafruit-components.json', sep=\" \", header = None)\n",
259
+    "new_file.head()"
260
+   ]
261
+  }
262
+ ],
263
+ "metadata": {
264
+  "kernelspec": {
265
+   "display_name": "Python 3",
266
+   "language": "python",
267
+   "name": "python3"
268
+  },
269
+  "language_info": {
270
+   "codemirror_mode": {
271
+    "name": "ipython",
272
+    "version": 3
273
+   },
274
+   "file_extension": ".py",
275
+   "mimetype": "text/x-python",
276
+   "name": "python",
277
+   "nbconvert_exporter": "python",
278
+   "pygments_lexer": "ipython3",
279
+   "version": "3.6.0"
280
+  }
281
+ },
282
+ "nbformat": 4,
283
+ "nbformat_minor": 2
284
+}

+ 262 - 0
data/.ipynb_checkpoints/housing-prices-sao-paulo-checkpoint.ipynb

@@ -0,0 +1,262 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Housing Prices in São Paulo\n",
8
+    "\n",
9
+    "This notebook gathers information about housing prices and their sizes on the city of São Paulo, Brazil."
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "code",
14
+   "execution_count": 1,
15
+   "metadata": {
16
+    "collapsed": true
17
+   },
18
+   "outputs": [],
19
+   "source": [
20
+    "# Import Libraries\n",
21
+    "import pandas as pd\n",
22
+    "import requests\n",
23
+    "from bs4 import BeautifulSoup\n",
24
+    "import matplotlib.pyplot as plt\n",
25
+    "import matplotlib\n",
26
+    "matplotlib.style.use('ggplot')\n",
27
+    "%matplotlib notebook"
28
+   ]
29
+  },
30
+  {
31
+   "cell_type": "markdown",
32
+   "metadata": {},
33
+   "source": [
34
+    "This dataset is gathering information from [Imovel Web](http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-1.html), a brazilian online real estate portal."
35
+   ]
36
+  },
37
+  {
38
+   "cell_type": "code",
39
+   "execution_count": 2,
40
+   "metadata": {
41
+    "collapsed": true,
42
+    "scrolled": true
43
+   },
44
+   "outputs": [],
45
+   "source": [
46
+    "def getURL(page_number):\n",
47
+    "    base_url = \"http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-\"\n",
48
+    "    end_url = \".html\"\n",
49
+    "    url = base_url + str(page_number) + end_url\n",
50
+    "    return url"
51
+   ]
52
+  },
53
+  {
54
+   "cell_type": "code",
55
+   "execution_count": 3,
56
+   "metadata": {
57
+    "collapsed": true
58
+   },
59
+   "outputs": [],
60
+   "source": [
61
+    "def num(s):\n",
62
+    "    try:\n",
63
+    "        return int(s)\n",
64
+    "    except ValueError:\n",
65
+    "        return float(s)"
66
+   ]
67
+  },
68
+  {
69
+   "cell_type": "code",
70
+   "execution_count": null,
71
+   "metadata": {
72
+    "collapsed": true
73
+   },
74
+   "outputs": [],
75
+   "source": [
76
+    "def grab_data(url, i):\n",
77
+    "    try:\n",
78
+    "        result = requests.get(url)\n",
79
+    "        page = BeautifulSoup(result.content, \"html5lib\")\n",
80
+    "        items = page.find_all('li', class_='post')\n",
81
+    "        for item in items:\n",
82
+    "            title = item.find(\"a\", class_='dl-aviso-link').get('title')\n",
83
+    "            price = item.find(\"span\", class_='precio-valor').string.replace(\"R$\",\"\").replace(\".\",\"\").strip()\n",
84
+    "            size = item.find(\"li\", class_='post-m2totales')\n",
85
+    "            if size is not None:\n",
86
+    "                size = size.text.replace(\"total\",\"\").strip()\n",
87
+    "                #print(size + \" - \" + price + \" - \" + title)\n",
88
+    "                price = num(str(price))/1000\n",
89
+    "                size = num(str(size.replace(\"m²\",\"\")))\n",
90
+    "                df.loc[i] = [size, price]\n",
91
+    "                i = i + 1\n",
92
+    "        return i\n",
93
+    "    except:\n",
94
+    "        print(\"--> ERROR\")\n",
95
+    "        return i"
96
+   ]
97
+  },
98
+  {
99
+   "cell_type": "code",
100
+   "execution_count": null,
101
+   "metadata": {
102
+    "scrolled": false
103
+   },
104
+   "outputs": [
105
+    {
106
+     "name": "stdout",
107
+     "output_type": "stream",
108
+     "text": [
109
+      "1 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-1.html\n",
110
+      "2 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-2.html\n",
111
+      "3 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-3.html\n",
112
+      "4 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-4.html\n",
113
+      "5 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-5.html\n",
114
+      "6 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-6.html\n",
115
+      "7 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-7.html\n",
116
+      "8 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-8.html\n",
117
+      "9 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-9.html\n",
118
+      "10 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-10.html\n",
119
+      "11 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-11.html\n",
120
+      "12 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-12.html\n",
121
+      "13 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-13.html\n",
122
+      "14 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-14.html\n",
123
+      "15 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-15.html\n",
124
+      "16 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-16.html\n",
125
+      "--> ERROR\n",
126
+      "17 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-17.html\n",
127
+      "18 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-18.html\n",
128
+      "19 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-19.html\n",
129
+      "20 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-20.html\n",
130
+      "21 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-21.html\n",
131
+      "22 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-22.html\n",
132
+      "23 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-23.html\n",
133
+      "24 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-24.html\n",
134
+      "25 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-25.html\n",
135
+      "26 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-26.html\n",
136
+      "27 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-27.html\n",
137
+      "28 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-28.html\n",
138
+      "29 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-29.html\n",
139
+      "--> ERROR\n",
140
+      "30 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-30.html\n",
141
+      "31 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-31.html\n",
142
+      "32 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-32.html\n",
143
+      "33 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-33.html\n",
144
+      "34 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-34.html\n",
145
+      "35 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-35.html\n",
146
+      "36 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-36.html\n",
147
+      "37 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-37.html\n",
148
+      "38 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-38.html\n",
149
+      "39 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-39.html\n",
150
+      "40 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-40.html\n",
151
+      "41 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-41.html\n",
152
+      "42 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-42.html\n",
153
+      "43 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-43.html\n",
154
+      "44 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-44.html\n",
155
+      "45 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-45.html\n",
156
+      "--> ERROR\n",
157
+      "46 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-46.html\n",
158
+      "47 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-47.html\n",
159
+      "48 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-48.html\n",
160
+      "49 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-49.html\n",
161
+      "50 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-50.html\n",
162
+      "51 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-51.html\n",
163
+      "52 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-52.html\n",
164
+      "53 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-53.html\n",
165
+      "54 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-54.html\n",
166
+      "55 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-55.html\n",
167
+      "56 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-56.html\n",
168
+      "57 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-57.html\n",
169
+      "58 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-58.html\n",
170
+      "59 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-59.html\n",
171
+      "60 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-60.html\n",
172
+      "61 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-61.html\n",
173
+      "62 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-62.html\n",
174
+      "63 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-63.html\n",
175
+      "--> ERROR\n",
176
+      "64 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-64.html\n",
177
+      "65 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-65.html\n",
178
+      "66 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-66.html\n",
179
+      "67 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-67.html\n",
180
+      "68 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-68.html\n",
181
+      "69 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-69.html\n",
182
+      "70 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-70.html\n",
183
+      "71 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-71.html\n",
184
+      "72 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-72.html\n",
185
+      "73 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-73.html\n",
186
+      "74 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-74.html\n",
187
+      "75 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-75.html\n",
188
+      "76 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-76.html\n",
189
+      "77 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-77.html\n",
190
+      "78 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-78.html\n",
191
+      "79 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-79.html\n",
192
+      "80 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-80.html\n",
193
+      "81 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-81.html\n",
194
+      "82 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-82.html\n",
195
+      "83 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-83.html\n",
196
+      "84 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-84.html\n",
197
+      "85 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-85.html\n",
198
+      "86 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-86.html\n",
199
+      "--> ERROR\n",
200
+      "87 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-87.html\n",
201
+      "88 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-88.html\n",
202
+      "--> ERROR\n",
203
+      "89 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-89.html\n",
204
+      "90 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-90.html\n"
205
+     ]
206
+    }
207
+   ],
208
+   "source": [
209
+    "df = pd.DataFrame([], columns=('size', 'price'))\n",
210
+    "i = 0\n",
211
+    "for page_number in  range(1,100):\n",
212
+    "    url = getURL(page_number)\n",
213
+    "    print(str(page_number) + \" - \" + url)\n",
214
+    "    i = grab_data(url, i)\n",
215
+    "df.tail()       "
216
+   ]
217
+  },
218
+  {
219
+   "cell_type": "code",
220
+   "execution_count": null,
221
+   "metadata": {
222
+    "scrolled": false
223
+   },
224
+   "outputs": [],
225
+   "source": [
226
+    "df.plot(x=\"size\",  y=\"price\", kind='scatter', color='DarkBlue', xlim=(0, 350), ylim=(0, 2500000))\n",
227
+    "plt.xlabel(\"Size (m²)\")\n",
228
+    "plt.ylabel(\"Price (R$)\")"
229
+   ]
230
+  },
231
+  {
232
+   "cell_type": "code",
233
+   "execution_count": null,
234
+   "metadata": {
235
+    "collapsed": true
236
+   },
237
+   "outputs": [],
238
+   "source": []
239
+  }
240
+ ],
241
+ "metadata": {
242
+  "kernelspec": {
243
+   "display_name": "Python 3",
244
+   "language": "python",
245
+   "name": "python3"
246
+  },
247
+  "language_info": {
248
+   "codemirror_mode": {
249
+    "name": "ipython",
250
+    "version": 3
251
+   },
252
+   "file_extension": ".py",
253
+   "mimetype": "text/x-python",
254
+   "name": "python",
255
+   "nbconvert_exporter": "python",
256
+   "pygments_lexer": "ipython3",
257
+   "version": "3.6.0"
258
+  }
259
+ },
260
+ "nbformat": 4,
261
+ "nbformat_minor": 2
262
+}

+ 1090 - 0
data/housing-prices-sao-paulo.ipynb

@@ -0,0 +1,1210 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Housing Prices in São Paulo\n",
8
+    "\n",
9
+    "This notebook gathers information about housing prices and their sizes on the city of São Paulo, Brazil."
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "code",
14
+   "execution_count": 1,
15
+   "metadata": {
16
+    "collapsed": true
17
+   },
18
+   "outputs": [],
19
+   "source": [
20
+    "# Import Libraries\n",
21
+    "import pandas as pd\n",
22
+    "import requests\n",
23
+    "from bs4 import BeautifulSoup\n",
24
+    "import matplotlib.pyplot as plt\n",
25
+    "import matplotlib\n",
26
+    "matplotlib.style.use('ggplot')\n",
27
+    "%matplotlib notebook"
28
+   ]
29
+  },
30
+  {
31
+   "cell_type": "markdown",
32
+   "metadata": {},
33
+   "source": [
34
+    "This dataset is gathering information from [Imovel Web](http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-1.html), a brazilian online real estate portal. The function belows creates a new URL in each loop iteration."
35
+   ]
36
+  },
37
+  {
38
+   "cell_type": "code",
39
+   "execution_count": 2,
40
+   "metadata": {
41
+    "collapsed": true,
42
+    "scrolled": true
43
+   },
44
+   "outputs": [],
45
+   "source": [
46
+    "def getURL(page_number):\n",
47
+    "    base_url = \"http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-\"\n",
48
+    "    end_url = \".html\"\n",
49
+    "    url = base_url + str(page_number) + end_url\n",
50
+    "    return url"
51
+   ]
52
+  },
53
+  {
54
+   "cell_type": "code",
55
+   "execution_count": 3,
56
+   "metadata": {
57
+    "collapsed": true
58
+   },
59
+   "outputs": [],
60
+   "source": [
61
+    "def num(s):\n",
62
+    "    try:\n",
63
+    "        return int(s)\n",
64
+    "    except ValueError:\n",
65
+    "        return float(s)"
66
+   ]
67
+  },
68
+  {
69
+   "cell_type": "markdown",
70
+   "metadata": {},
71
+   "source": [
72
+    "The function below requests a url, passes the page to [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) that in turn scrapes each data item from a this page. Each new item is added to a [pandas](https://pandas.pydata.org) series that is then appended to a dataset for later use."
73
+   ]
74
+  },
75
+  {
76
+   "cell_type": "code",
77
+   "execution_count": 4,
78
+   "metadata": {
79
+    "collapsed": true
80
+   },
81
+   "outputs": [],
82
+   "source": [
83
+    "def grab_data(url, i):\n",
84
+    "    try:\n",
85
+    "        result = requests.get(url)\n",
86
+    "        page = BeautifulSoup(result.content, \"html5lib\")\n",
87
+    "        items = page.find_all('li', class_='post')\n",
88
+    "        for item in items:\n",
89
+    "            title = item.find(\"a\", class_='dl-aviso-link').get('title')\n",
90
+    "            price = item.find(\"span\", class_='precio-valor').string.replace(\"R$\",\"\").replace(\".\",\"\").strip()\n",
91
+    "            size = item.find(\"li\", class_='post-m2totales')\n",
92
+    "            if size is not None:\n",
93
+    "                size = size.text.replace(\"total\",\"\").strip()\n",
94
+    "                #print(size + \" - \" + price + \" - \" + title)\n",
95
+    "                price = num(str(price))/1000\n",
96
+    "                size = num(str(size.replace(\"m²\",\"\")))\n",
97
+    "                df.loc[i] = [size, price]\n",
98
+    "                i = i + 1\n",
99
+    "        return i\n",
100
+    "    except:\n",
101
+    "        print(\"--> ERROR\")\n",
102
+    "        return i"
103
+   ]
104
+  },
105
+  {
106
+   "cell_type": "markdown",
107
+   "metadata": {},
108
+   "source": [
109
+    "Below is the actual program loop. It will grab data from *n* number of pages using the ```grab_data()``` function. While this is happening, the program prints the current URL that is beign scraped or prints an error message. If an error occurs, the program will continue scraping from the next link."
110
+   ]
111
+  },
112
+  {
113
+   "cell_type": "code",
114
+   "execution_count": 5,
115
+   "metadata": {
116
+    "scrolled": true
117
+   },
118
+   "outputs": [
119
+    {
120
+     "name": "stdout",
121
+     "output_type": "stream",
122
+     "text": [
123
+      "1 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-1.html\n",
124
+      "2 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-2.html\n",
125
+      "3 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-3.html\n",
126
+      "4 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-4.html\n",
127
+      "5 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-5.html\n",
128
+      "6 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-6.html\n",
129
+      "7 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-7.html\n",
130
+      "8 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-8.html\n",
131
+      "9 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-9.html\n",
132
+      "10 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-10.html\n",
133
+      "11 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-11.html\n",
134
+      "12 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-12.html\n",
135
+      "13 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-13.html\n",
136
+      "14 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-14.html\n",
137
+      "15 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-15.html\n",
138
+      "16 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-16.html\n",
139
+      "--> ERROR\n",
140
+      "17 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-17.html\n",
141
+      "18 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-18.html\n",
142
+      "19 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-19.html\n",
143
+      "20 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-20.html\n",
144
+      "21 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-21.html\n",
145
+      "22 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-22.html\n",
146
+      "23 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-23.html\n",
147
+      "24 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-24.html\n",
148
+      "25 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-25.html\n",
149
+      "26 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-26.html\n",
150
+      "27 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-27.html\n",
151
+      "28 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-28.html\n",
152
+      "29 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-29.html\n",
153
+      "--> ERROR\n",
154
+      "30 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-30.html\n",
155
+      "31 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-31.html\n",
156
+      "32 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-32.html\n",
157
+      "33 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-33.html\n",
158
+      "34 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-34.html\n",
159
+      "35 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-35.html\n",
160
+      "36 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-36.html\n",
161
+      "37 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-37.html\n",
162
+      "38 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-38.html\n",
163
+      "39 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-39.html\n",
164
+      "40 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-40.html\n",
165
+      "41 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-41.html\n",
166
+      "42 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-42.html\n",
167
+      "43 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-43.html\n",
168
+      "44 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-44.html\n",
169
+      "45 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-45.html\n",
170
+      "--> ERROR\n",
171
+      "46 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-46.html\n",
172
+      "47 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-47.html\n",
173
+      "48 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-48.html\n",
174
+      "49 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-49.html\n",
175
+      "50 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-50.html\n",
176
+      "51 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-51.html\n",
177
+      "52 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-52.html\n",
178
+      "53 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-53.html\n",
179
+      "54 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-54.html\n",
180
+      "55 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-55.html\n",
181
+      "56 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-56.html\n",
182
+      "57 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-57.html\n",
183
+      "58 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-58.html\n",
184
+      "59 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-59.html\n",
185
+      "60 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-60.html\n",
186
+      "61 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-61.html\n",
187
+      "62 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-62.html\n",
188
+      "63 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-63.html\n",
189
+      "--> ERROR\n",
190
+      "64 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-64.html\n",
191
+      "65 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-65.html\n",
192
+      "66 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-66.html\n",
193
+      "67 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-67.html\n",
194
+      "68 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-68.html\n",
195
+      "69 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-69.html\n",
196
+      "70 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-70.html\n",
197
+      "71 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-71.html\n",
198
+      "72 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-72.html\n",
199
+      "73 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-73.html\n",
200
+      "74 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-74.html\n",
201
+      "75 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-75.html\n",
202
+      "76 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-76.html\n",
203
+      "77 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-77.html\n",
204
+      "78 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-78.html\n",
205
+      "79 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-79.html\n",
206
+      "80 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-80.html\n",
207
+      "81 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-81.html\n",
208
+      "82 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-82.html\n",
209
+      "83 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-83.html\n",
210
+      "84 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-84.html\n",
211
+      "85 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-85.html\n",
212
+      "86 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-86.html\n",
213
+      "--> ERROR\n",
214
+      "87 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-87.html\n",
215
+      "88 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-88.html\n",
216
+      "--> ERROR\n",
217
+      "89 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-89.html\n",
218
+      "90 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-90.html\n",
219
+      "91 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-91.html\n",
220
+      "--> ERROR\n",
221
+      "92 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-92.html\n",
222
+      "93 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-93.html\n",
223
+      "94 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-94.html\n",
224
+      "95 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-95.html\n",
225
+      "96 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-96.html\n",
226
+      "97 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-97.html\n",
227
+      "98 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-98.html\n",
228
+      "99 - http://www.imovelweb.com.br/imoveis-venda-sao-paulo-sp-pagina-99.html\n"
229
+     ]
230
+    },
231
+    {
232
+     "data": {
233
+      "text/html": [
234
+       "<div>\n",
235
+       "<table border=\"1\" class=\"dataframe\">\n",
236
+       "  <thead>\n",
237
+       "    <tr style=\"text-align: right;\">\n",
238
+       "      <th></th>\n",
239
+       "      <th>size</th>\n",
240
+       "      <th>price</th>\n",
241
+       "    </tr>\n",
242
+       "  </thead>\n",
243
+       "  <tbody>\n",
244
+       "    <tr>\n",
245
+       "      <th>1084</th>\n",
246
+       "      <td>69.0</td>\n",
247
+       "      <td>480.0</td>\n",
248
+       "    </tr>\n",
249
+       "    <tr>\n",
250
+       "      <th>1085</th>\n",
251
+       "      <td>103.0</td>\n",
252
+       "      <td>945.0</td>\n",
253
+       "    </tr>\n",
254
+       "    <tr>\n",
255
+       "      <th>1086</th>\n",
256
+       "      <td>56.0</td>\n",
257
+       "      <td>650.0</td>\n",
258
+       "    </tr>\n",
259
+       "    <tr>\n",
260
+       "      <th>1087</th>\n",
261
+       "      <td>81.0</td>\n",
262
+       "      <td>800.0</td>\n",
263
+       "    </tr>\n",
264
+       "    <tr>\n",
265
+       "      <th>1088</th>\n",
266
+       "      <td>70.0</td>\n",
267
+       "      <td>35.0</td>\n",
268
+       "    </tr>\n",
269
+       "  </tbody>\n",
270
+       "</table>\n",
271
+       "</div>"
272
+      ],
273
+      "text/plain": [
274
+       "       size  price\n",
275
+       "1084   69.0  480.0\n",
276
+       "1085  103.0  945.0\n",
277
+       "1086   56.0  650.0\n",
278
+       "1087   81.0  800.0\n",
279
+       "1088   70.0   35.0"
280
+      ]
281
+     },
282
+     "execution_count": 5,
283
+     "metadata": {},
284
+     "output_type": "execute_result"
285
+    }
286
+   ],
287
+   "source": [
288
+    "df = pd.DataFrame([], columns=('size', 'price'))\n",
289
+    "i = 0\n",
290
+    "for page_number in  range(1,100):\n",
291
+    "    url = getURL(page_number)\n",
292
+    "    print(str(page_number) + \" - \" + url)\n",
293
+    "    i = grab_data(url, i)\n",
294
+    "df.tail()       "
295
+   ]
296
+  },
297
+  {
298
+   "cell_type": "markdown",
299
+   "metadata": {},
300
+   "source": [
301
+    "The next snippet creates a plot with the data gathered in the previous step."
302
+   ]
303
+  },
304
+  {
305
+   "cell_type": "code",
306
+   "execution_count": 28,
307
+   "metadata": {
308
+    "scrolled": false
309
+   },
310
+   "outputs": [
311
+    {
312
+     "data": {
313
+      "application/javascript": [
314
+       "/* Put everything inside the global mpl namespace */\n",
315
+       "window.mpl = {};\n",
316
+       "\n",
317
+       "\n",
318
+       "mpl.get_websocket_type = function() {\n",
319
+       "    if (typeof(WebSocket) !== 'undefined') {\n",
320
+       "        return WebSocket;\n",
321
+       "    } else if (typeof(MozWebSocket) !== 'undefined') {\n",
322
+       "        return MozWebSocket;\n",
323
+       "    } else {\n",
324
+       "        alert('Your browser does not have WebSocket support.' +\n",
325
+       "              'Please try Chrome, Safari or Firefox ≥ 6. ' +\n",
326
+       "              'Firefox 4 and 5 are also supported but you ' +\n",
327
+       "              'have to enable WebSockets in about:config.');\n",
328
+       "    };\n",
329
+       "}\n",
330
+       "\n",
331
+       "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n",
332
+       "    this.id = figure_id;\n",
333
+       "\n",
334
+       "    this.ws = websocket;\n",
335
+       "\n",
336
+       "    this.supports_binary = (this.ws.binaryType != undefined);\n",
337
+       "\n",
338
+       "    if (!this.supports_binary) {\n",
339
+       "        var warnings = document.getElementById(\"mpl-warnings\");\n",
340
+       "        if (warnings) {\n",
341
+       "            warnings.style.display = 'block';\n",
342
+       "            warnings.textContent = (\n",
343
+       "                \"This browser does not support binary websocket messages. \" +\n",
344
+       "                    \"Performance may be slow.\");\n",
345
+       "        }\n",
346
+       "    }\n",
347
+       "\n",
348
+       "    this.imageObj = new Image();\n",
349
+       "\n",
350
+       "    this.context = undefined;\n",
351
+       "    this.message = undefined;\n",
352
+       "    this.canvas = undefined;\n",
353
+       "    this.rubberband_canvas = undefined;\n",
354
+       "    this.rubberband_context = undefined;\n",
355
+       "    this.format_dropdown = undefined;\n",
356
+       "\n",
357
+       "    this.image_mode = 'full';\n",
358
+       "\n",
359
+       "    this.root = $('<div/>');\n",
360
+       "    this._root_extra_style(this.root)\n",
361
+       "    this.root.attr('style', 'display: inline-block');\n",
362
+       "\n",
363
+       "    $(parent_element).append(this.root);\n",
364
+       "\n",
365
+       "    this._init_header(this);\n",
366
+       "    this._init_canvas(this);\n",
367
+       "    this._init_toolbar(this);\n",
368
+       "\n",
369
+       "    var fig = this;\n",
370
+       "\n",
371
+       "    this.waiting = false;\n",
372
+       "\n",
373
+       "    this.ws.onopen =  function () {\n",
374
+       "            fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n",
375
+       "            fig.send_message(\"send_image_mode\", {});\n",
376
+       "            if (mpl.ratio != 1) {\n",
377
+       "                fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n",
378
+       "            }\n",
379
+       "            fig.send_message(\"refresh\", {});\n",
380
+       "        }\n",
381
+       "\n",
382
+       "    this.imageObj.onload = function() {\n",
383
+       "            if (fig.image_mode == 'full') {\n",
384
+       "                // Full images could contain transparency (where diff images\n",
385
+       "                // almost always do), so we need to clear the canvas so that\n",
386
+       "                // there is no ghosting.\n",
387
+       "                fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n",
388
+       "            }\n",
389
+       "            fig.context.drawImage(fig.imageObj, 0, 0);\n",
390
+       "        };\n",
391
+       "\n",
392
+       "    this.imageObj.onunload = function() {\n",
393
+       "        this.ws.close();\n",
394
+       "    }\n",
395
+       "\n",
396
+       "    this.ws.onmessage = this._make_on_message_function(this);\n",
397
+       "\n",
398
+       "    this.ondownload = ondownload;\n",
399
+       "}\n",
400
+       "\n",
401
+       "mpl.figure.prototype._init_header = function() {\n",
402
+       "    var titlebar = $(\n",
403
+       "        '<div class=\"ui-dialog-titlebar ui-widget-header ui-corner-all ' +\n",
404
+       "        'ui-helper-clearfix\"/>');\n",
405
+       "    var titletext = $(\n",
406
+       "        '<div class=\"ui-dialog-title\" style=\"width: 100%; ' +\n",
407
+       "        'text-align: center; padding: 3px;\"/>');\n",
408
+       "    titlebar.append(titletext)\n",
409
+       "    this.root.append(titlebar);\n",
410
+       "    this.header = titletext[0];\n",
411
+       "}\n",
412
+       "\n",
413
+       "\n",
414
+       "\n",
415
+       "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n",
416
+       "\n",
417
+       "}\n",
418
+       "\n",
419
+       "\n",
420
+       "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n",
421
+       "\n",
422
+       "}\n",
423
+       "\n",
424
+       "mpl.figure.prototype._init_canvas = function() {\n",
425
+       "    var fig = this;\n",
426
+       "\n",
427
+       "    var canvas_div = $('<div/>');\n",
428
+       "\n",
429
+       "    canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n",
430
+       "\n",
431
+       "    function canvas_keyboard_event(event) {\n",
432
+       "        return fig.key_event(event, event['data']);\n",
433
+       "    }\n",
434
+       "\n",
435
+       "    canvas_div.keydown('key_press', canvas_keyboard_event);\n",
436
+       "    canvas_div.keyup('key_release', canvas_keyboard_event);\n",
437
+       "    this.canvas_div = canvas_div\n",
438
+       "    this._canvas_extra_style(canvas_div)\n",
439
+       "    this.root.append(canvas_div);\n",
440
+       "\n",
441
+       "    var canvas = $('<canvas/>');\n",
442
+       "    canvas.addClass('mpl-canvas');\n",
443
+       "    canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n",
444
+       "\n",
445
+       "    this.canvas = canvas[0];\n",
446
+       "    this.context = canvas[0].getContext(\"2d\");\n",
447
+       "\n",
448
+       "    var backingStore = this.context.backingStorePixelRatio ||\n",
449
+       "\tthis.context.webkitBackingStorePixelRatio ||\n",
450
+       "\tthis.context.mozBackingStorePixelRatio ||\n",
451
+       "\tthis.context.msBackingStorePixelRatio ||\n",
452
+       "\tthis.context.oBackingStorePixelRatio ||\n",
453
+       "\tthis.context.backingStorePixelRatio || 1;\n",
454
+       "\n",
455
+       "    mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n",
456
+       "\n",
457
+       "    var rubberband = $('<canvas/>');\n",
458
+       "    rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n",
459
+       "\n",
460
+       "    var pass_mouse_events = true;\n",
461
+       "\n",
462
+       "    canvas_div.resizable({\n",
463
+       "        start: function(event, ui) {\n",
464
+       "            pass_mouse_events = false;\n",
465
+       "        },\n",
466
+       "        resize: function(event, ui) {\n",
467
+       "            fig.request_resize(ui.size.width, ui.size.height);\n",
468
+       "        },\n",
469
+       "        stop: function(event, ui) {\n",
470
+       "            pass_mouse_events = true;\n",
471
+       "            fig.request_resize(ui.size.width, ui.size.height);\n",
472
+       "        },\n",
473
+       "    });\n",
474
+       "\n",
475
+       "    function mouse_event_fn(event) {\n",
476
+       "        if (pass_mouse_events)\n",
477
+       "            return fig.mouse_event(event, event['data']);\n",
478
+       "    }\n",
479
+       "\n",
480
+       "    rubberband.mousedown('button_press', mouse_event_fn);\n",
481
+       "    rubberband.mouseup('button_release', mouse_event_fn);\n",
482
+       "    // Throttle sequential mouse events to 1 every 20ms.\n",
483
+       "    rubberband.mousemove('motion_notify', mouse_event_fn);\n",
484
+       "\n",
485
+       "    rubberband.mouseenter('figure_enter', mouse_event_fn);\n",
486
+       "    rubberband.mouseleave('figure_leave', mouse_event_fn);\n",
487
+       "\n",
488
+       "    canvas_div.on(\"wheel\", function (event) {\n",
489
+       "        event = event.originalEvent;\n",
490
+       "        event['data'] = 'scroll'\n",
491
+       "        if (event.deltaY < 0) {\n",
492
+       "            event.step = 1;\n",
493
+       "        } else {\n",
494
+       "            event.step = -1;\n",
495
+       "        }\n",
496
+       "        mouse_event_fn(event);\n",
497
+       "    });\n",
498
+       "\n",
499
+       "    canvas_div.append(canvas);\n",
500
+       "    canvas_div.append(rubberband);\n",
501
+       "\n",
502
+       "    this.rubberband = rubberband;\n",
503
+       "    this.rubberband_canvas = rubberband[0];\n",
504
+       "    this.rubberband_context = rubberband[0].getContext(\"2d\");\n",
505
+       "    this.rubberband_context.strokeStyle = \"#000000\";\n",
506
+       "\n",
507
+       "    this._resize_canvas = function(width, height) {\n",
508
+       "        // Keep the size of the canvas, canvas container, and rubber band\n",
509
+       "        // canvas in synch.\n",
510
+       "        canvas_div.css('width', width)\n",
511
+       "        canvas_div.css('height', height)\n",
512
+       "\n",
513
+       "        canvas.attr('width', width * mpl.ratio);\n",
514
+       "        canvas.attr('height', height * mpl.ratio);\n",
515
+       "        canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n",
516
+       "\n",
517
+       "        rubberband.attr('width', width);\n",
518
+       "        rubberband.attr('height', height);\n",
519
+       "    }\n",
520
+       "\n",
521
+       "    // Set the figure to an initial 600x600px, this will subsequently be updated\n",
522
+       "    // upon first draw.\n",
523
+       "    this._resize_canvas(600, 600);\n",
524
+       "\n",
525
+       "    // Disable right mouse context menu.\n",
526
+       "    $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n",
527
+       "        return false;\n",
528
+       "    });\n",
529
+       "\n",
530
+       "    function set_focus () {\n",
531
+       "        canvas.focus();\n",
532
+       "        canvas_div.focus();\n",
533
+       "    }\n",
534
+       "\n",
535
+       "    window.setTimeout(set_focus, 100);\n",
536
+       "}\n",
537
+       "\n",
538
+       "mpl.figure.prototype._init_toolbar = function() {\n",
539
+       "    var fig = this;\n",
540
+       "\n",
541
+       "    var nav_element = $('<div/>')\n",
542
+       "    nav_element.attr('style', 'width: 100%');\n",
543
+       "    this.root.append(nav_element);\n",
544
+       "\n",
545
+       "    // Define a callback function for later on.\n",
546
+       "    function toolbar_event(event) {\n",
547
+       "        return fig.toolbar_button_onclick(event['data']);\n",
548
+       "    }\n",
549
+       "    function toolbar_mouse_event(event) {\n",
550
+       "        return fig.toolbar_button_onmouseover(event['data']);\n",
551
+       "    }\n",
552
+       "\n",
553
+       "    for(var toolbar_ind in mpl.toolbar_items) {\n",
554
+       "        var name = mpl.toolbar_items[toolbar_ind][0];\n",
555
+       "        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
556
+       "        var image = mpl.toolbar_items[toolbar_ind][2];\n",
557
+       "        var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
558
+       "\n",
559
+       "        if (!name) {\n",
560
+       "            // put a spacer in here.\n",
561
+       "            continue;\n",
562
+       "        }\n",
563
+       "        var button = $('<button/>');\n",
564
+       "        button.addClass('ui-button ui-widget ui-state-default ui-corner-all ' +\n",
565
+       "                        'ui-button-icon-only');\n",
566
+       "        button.attr('role', 'button');\n",
567
+       "        button.attr('aria-disabled', 'false');\n",
568
+       "        button.click(method_name, toolbar_event);\n",
569
+       "        button.mouseover(tooltip, toolbar_mouse_event);\n",
570
+       "\n",
571
+       "        var icon_img = $('<span/>');\n",
572
+       "        icon_img.addClass('ui-button-icon-primary ui-icon');\n",
573
+       "        icon_img.addClass(image);\n",
574
+       "        icon_img.addClass('ui-corner-all');\n",
575
+       "\n",
576
+       "        var tooltip_span = $('<span/>');\n",
577
+       "        tooltip_span.addClass('ui-button-text');\n",
578
+       "        tooltip_span.html(tooltip);\n",
579
+       "\n",
580
+       "        button.append(icon_img);\n",
581
+       "        button.append(tooltip_span);\n",
582
+       "\n",
583
+       "        nav_element.append(button);\n",
584
+       "    }\n",
585
+       "\n",
586
+       "    var fmt_picker_span = $('<span/>');\n",
587
+       "\n",
588
+       "    var fmt_picker = $('<select/>');\n",
589
+       "    fmt_picker.addClass('mpl-toolbar-option ui-widget ui-widget-content');\n",
590
+       "    fmt_picker_span.append(fmt_picker);\n",
591
+       "    nav_element.append(fmt_picker_span);\n",
592
+       "    this.format_dropdown = fmt_picker[0];\n",
593
+       "\n",
594
+       "    for (var ind in mpl.extensions) {\n",
595
+       "        var fmt = mpl.extensions[ind];\n",
596
+       "        var option = $(\n",
597
+       "            '<option/>', {selected: fmt === mpl.default_extension}).html(fmt);\n",
598
+       "        fmt_picker.append(option)\n",
599
+       "    }\n",
600
+       "\n",
601
+       "    // Add hover states to the ui-buttons\n",
602
+       "    $( \".ui-button\" ).hover(\n",
603
+       "        function() { $(this).addClass(\"ui-state-hover\");},\n",
604
+       "        function() { $(this).removeClass(\"ui-state-hover\");}\n",
605
+       "    );\n",
606
+       "\n",
607
+       "    var status_bar = $('<span class=\"mpl-message\"/>');\n",
608
+       "    nav_element.append(status_bar);\n",
609
+       "    this.message = status_bar[0];\n",
610
+       "}\n",
611
+       "\n",
612
+       "mpl.figure.prototype.request_resize = function(x_pixels, y_pixels) {\n",
613
+       "    // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n",
614
+       "    // which will in turn request a refresh of the image.\n",
615
+       "    this.send_message('resize', {'width': x_pixels, 'height': y_pixels});\n",
616
+       "}\n",
617
+       "\n",
618
+       "mpl.figure.prototype.send_message = function(type, properties) {\n",
619
+       "    properties['type'] = type;\n",
620
+       "    properties['figure_id'] = this.id;\n",
621
+       "    this.ws.send(JSON.stringify(properties));\n",
622
+       "}\n",
623
+       "\n",
624
+       "mpl.figure.prototype.send_draw_message = function() {\n",
625
+       "    if (!this.waiting) {\n",
626
+       "        this.waiting = true;\n",
627
+       "        this.ws.send(JSON.stringify({type: \"draw\", figure_id: this.id}));\n",
628
+       "    }\n",
629
+       "}\n",
630
+       "\n",
631
+       "\n",
632
+       "mpl.figure.prototype.handle_save = function(fig, msg) {\n",
633
+       "    var format_dropdown = fig.format_dropdown;\n",
634
+       "    var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n",
635
+       "    fig.ondownload(fig, format);\n",
636
+       "}\n",
637
+       "\n",
638
+       "\n",
639
+       "mpl.figure.prototype.handle_resize = function(fig, msg) {\n",
640
+       "    var size = msg['size'];\n",
641
+       "    if (size[0] != fig.canvas.width || size[1] != fig.canvas.height) {\n",
642
+       "        fig._resize_canvas(size[0], size[1]);\n",
643
+       "        fig.send_message(\"refresh\", {});\n",
644
+       "    };\n",
645
+       "}\n",
646
+       "\n",
647
+       "mpl.figure.prototype.handle_rubberband = function(fig, msg) {\n",
648
+       "    var x0 = msg['x0'] / mpl.ratio;\n",
649
+       "    var y0 = (fig.canvas.height - msg['y0']) / mpl.ratio;\n",
650
+       "    var x1 = msg['x1'] / mpl.ratio;\n",
651
+       "    var y1 = (fig.canvas.height - msg['y1']) / mpl.ratio;\n",
652
+       "    x0 = Math.floor(x0) + 0.5;\n",
653
+       "    y0 = Math.floor(y0) + 0.5;\n",
654
+       "    x1 = Math.floor(x1) + 0.5;\n",
655
+       "    y1 = Math.floor(y1) + 0.5;\n",
656
+       "    var min_x = Math.min(x0, x1);\n",
657
+       "    var min_y = Math.min(y0, y1);\n",
658
+       "    var width = Math.abs(x1 - x0);\n",
659
+       "    var height = Math.abs(y1 - y0);\n",
660
+       "\n",
661
+       "    fig.rubberband_context.clearRect(\n",
662
+       "        0, 0, fig.canvas.width, fig.canvas.height);\n",
663
+       "\n",
664
+       "    fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n",
665
+       "}\n",
666
+       "\n",
667
+       "mpl.figure.prototype.handle_figure_label = function(fig, msg) {\n",
668
+       "    // Updates the figure title.\n",
669
+       "    fig.header.textContent = msg['label'];\n",
670
+       "}\n",
671
+       "\n",
672
+       "mpl.figure.prototype.handle_cursor = function(fig, msg) {\n",
673
+       "    var cursor = msg['cursor'];\n",
674
+       "    switch(cursor)\n",
675
+       "    {\n",
676
+       "    case 0:\n",
677
+       "        cursor = 'pointer';\n",
678
+       "        break;\n",
679
+       "    case 1:\n",
680
+       "        cursor = 'default';\n",
681
+       "        break;\n",
682
+       "    case 2:\n",
683
+       "        cursor = 'crosshair';\n",
684
+       "        break;\n",
685
+       "    case 3:\n",
686
+       "        cursor = 'move';\n",
687
+       "        break;\n",
688
+       "    }\n",
689
+       "    fig.rubberband_canvas.style.cursor = cursor;\n",
690
+       "}\n",
691
+       "\n",
692
+       "mpl.figure.prototype.handle_message = function(fig, msg) {\n",
693
+       "    fig.message.textContent = msg['message'];\n",
694
+       "}\n",
695
+       "\n",
696
+       "mpl.figure.prototype.handle_draw = function(fig, msg) {\n",
697
+       "    // Request the server to send over a new figure.\n",
698
+       "    fig.send_draw_message();\n",
699
+       "}\n",
700
+       "\n",
701
+       "mpl.figure.prototype.handle_image_mode = function(fig, msg) {\n",
702
+       "    fig.image_mode = msg['mode'];\n",
703
+       "}\n",
704
+       "\n",
705
+       "mpl.figure.prototype.updated_canvas_event = function() {\n",
706
+       "    // Called whenever the canvas gets updated.\n",
707
+       "    this.send_message(\"ack\", {});\n",
708
+       "}\n",
709
+       "\n",
710
+       "// A function to construct a web socket function for onmessage handling.\n",
711
+       "// Called in the figure constructor.\n",
712
+       "mpl.figure.prototype._make_on_message_function = function(fig) {\n",
713
+       "    return function socket_on_message(evt) {\n",
714
+       "        if (evt.data instanceof Blob) {\n",
715
+       "            /* FIXME: We get \"Resource interpreted as Image but\n",
716
+       "             * transferred with MIME type text/plain:\" errors on\n",
717
+       "             * Chrome.  But how to set the MIME type?  It doesn't seem\n",
718
+       "             * to be part of the websocket stream */\n",
719
+       "            evt.data.type = \"image/png\";\n",
720
+       "\n",
721
+       "            /* Free the memory for the previous frames */\n",
722
+       "            if (fig.imageObj.src) {\n",
723
+       "                (window.URL || window.webkitURL).revokeObjectURL(\n",
724
+       "                    fig.imageObj.src);\n",
725
+       "            }\n",
726
+       "\n",
727
+       "            fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n",
728
+       "                evt.data);\n",
729
+       "            fig.updated_canvas_event();\n",
730
+       "            fig.waiting = false;\n",
731
+       "            return;\n",
732
+       "        }\n",
733
+       "        else if (typeof evt.data === 'string' && evt.data.slice(0, 21) == \"data:image/png;base64\") {\n",
734
+       "            fig.imageObj.src = evt.data;\n",
735
+       "            fig.updated_canvas_event();\n",
736
+       "            fig.waiting = false;\n",
737
+       "            return;\n",
738
+       "        }\n",
739
+       "\n",
740
+       "        var msg = JSON.parse(evt.data);\n",
741
+       "        var msg_type = msg['type'];\n",
742
+       "\n",
743
+       "        // Call the  \"handle_{type}\" callback, which takes\n",
744
+       "        // the figure and JSON message as its only arguments.\n",
745
+       "        try {\n",
746
+       "            var callback = fig[\"handle_\" + msg_type];\n",
747
+       "        } catch (e) {\n",
748
+       "            console.log(\"No handler for the '\" + msg_type + \"' message type: \", msg);\n",
749
+       "            return;\n",
750
+       "        }\n",
751
+       "\n",
752
+       "        if (callback) {\n",
753
+       "            try {\n",
754
+       "                // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n",
755
+       "                callback(fig, msg);\n",
756
+       "            } catch (e) {\n",
757
+       "                console.log(\"Exception inside the 'handler_\" + msg_type + \"' callback:\", e, e.stack, msg);\n",
758
+       "            }\n",
759
+       "        }\n",
760
+       "    };\n",
761
+       "}\n",
762
+       "\n",
763
+       "// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\n",
764
+       "mpl.findpos = function(e) {\n",
765
+       "    //this section is from http://www.quirksmode.org/js/events_properties.html\n",
766
+       "    var targ;\n",
767
+       "    if (!e)\n",
768
+       "        e = window.event;\n",
769
+       "    if (e.target)\n",
770
+       "        targ = e.target;\n",
771
+       "    else if (e.srcElement)\n",
772
+       "        targ = e.srcElement;\n",
773
+       "    if (targ.nodeType == 3) // defeat Safari bug\n",
774
+       "        targ = targ.parentNode;\n",
775
+       "\n",
776
+       "    // jQuery normalizes the pageX and pageY\n",
777
+       "    // pageX,Y are the mouse positions relative to the document\n",
778
+       "    // offset() returns the position of the element relative to the document\n",
779
+       "    var x = e.pageX - $(targ).offset().left;\n",
780
+       "    var y = e.pageY - $(targ).offset().top;\n",
781
+       "\n",
782
+       "    return {\"x\": x, \"y\": y};\n",
783
+       "};\n",
784
+       "\n",
785
+       "/*\n",
786
+       " * return a copy of an object with only non-object keys\n",
787
+       " * we need this to avoid circular references\n",
788
+       " * http://stackoverflow.com/a/24161582/3208463\n",
789
+       " */\n",
790
+       "function simpleKeys (original) {\n",
791
+       "  return Object.keys(original).reduce(function (obj, key) {\n",
792
+       "    if (typeof original[key] !== 'object')\n",
793
+       "        obj[key] = original[key]\n",
794
+       "    return obj;\n",
795
+       "  }, {});\n",
796
+       "}\n",
797
+       "\n",
798
+       "mpl.figure.prototype.mouse_event = function(event, name) {\n",
799
+       "    var canvas_pos = mpl.findpos(event)\n",
800
+       "\n",
801
+       "    if (name === 'button_press')\n",
802
+       "    {\n",
803
+       "        this.canvas.focus();\n",
804
+       "        this.canvas_div.focus();\n",
805
+       "    }\n",
806
+       "\n",
807
+       "    var x = canvas_pos.x * mpl.ratio;\n",
808
+       "    var y = canvas_pos.y * mpl.ratio;\n",
809
+       "\n",
810
+       "    this.send_message(name, {x: x, y: y, button: event.button,\n",
811
+       "                             step: event.step,\n",
812
+       "                             guiEvent: simpleKeys(event)});\n",
813
+       "\n",
814
+       "    /* This prevents the web browser from automatically changing to\n",
815
+       "     * the text insertion cursor when the button is pressed.  We want\n",
816
+       "     * to control all of the cursor setting manually through the\n",
817
+       "     * 'cursor' event from matplotlib */\n",
818
+       "    event.preventDefault();\n",
819
+       "    return false;\n",
820
+       "}\n",
821
+       "\n",
822
+       "mpl.figure.prototype._key_event_extra = function(event, name) {\n",
823
+       "    // Handle any extra behaviour associated with a key event\n",
824
+       "}\n",
825
+       "\n",
826
+       "mpl.figure.prototype.key_event = function(event, name) {\n",
827
+       "\n",
828
+       "    // Prevent repeat events\n",
829
+       "    if (name == 'key_press')\n",
830
+       "    {\n",
831
+       "        if (event.which === this._key)\n",
832
+       "            return;\n",
833
+       "        else\n",
834
+       "            this._key = event.which;\n",
835
+       "    }\n",
836
+       "    if (name == 'key_release')\n",
837
+       "        this._key = null;\n",
838
+       "\n",
839
+       "    var value = '';\n",
840
+       "    if (event.ctrlKey && event.which != 17)\n",
841
+       "        value += \"ctrl+\";\n",
842
+       "    if (event.altKey && event.which != 18)\n",
843
+       "        value += \"alt+\";\n",
844
+       "    if (event.shiftKey && event.which != 16)\n",
845
+       "        value += \"shift+\";\n",
846
+       "\n",
847
+       "    value += 'k';\n",
848
+       "    value += event.which.toString();\n",
849
+       "\n",
850
+       "    this._key_event_extra(event, name);\n",
851
+       "\n",
852
+       "    this.send_message(name, {key: value,\n",
853
+       "                             guiEvent: simpleKeys(event)});\n",
854
+       "    return false;\n",
855
+       "}\n",
856
+       "\n",
857
+       "mpl.figure.prototype.toolbar_button_onclick = function(name) {\n",
858
+       "    if (name == 'download') {\n",
859
+       "        this.handle_save(this, null);\n",
860
+       "    } else {\n",
861
+       "        this.send_message(\"toolbar_button\", {name: name});\n",
862
+       "    }\n",
863
+       "};\n",
864
+       "\n",
865
+       "mpl.figure.prototype.toolbar_button_onmouseover = function(tooltip) {\n",
866
+       "    this.message.textContent = tooltip;\n",
867
+       "};\n",
868
+       "mpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to  previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Pan axes with left mouse, zoom with right\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n",
869
+       "\n",
870
+       "mpl.extensions = [\"eps\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\"];\n",
871
+       "\n",
872
+       "mpl.default_extension = \"png\";var comm_websocket_adapter = function(comm) {\n",
873
+       "    // Create a \"websocket\"-like object which calls the given IPython comm\n",
874
+       "    // object with the appropriate methods. Currently this is a non binary\n",
875
+       "    // socket, so there is still some room for performance tuning.\n",
876
+       "    var ws = {};\n",
877
+       "\n",
878
+       "    ws.close = function() {\n",
879
+       "        comm.close()\n",
880
+       "    };\n",
881
+       "    ws.send = function(m) {\n",
882
+       "        //console.log('sending', m);\n",
883
+       "        comm.send(m);\n",
884
+       "    };\n",
885
+       "    // Register the callback with on_msg.\n",
886
+       "    comm.on_msg(function(msg) {\n",
887
+       "        //console.log('receiving', msg['content']['data'], msg);\n",
888
+       "        // Pass the mpl event to the overriden (by mpl) onmessage function.\n",
889
+       "        ws.onmessage(msg['content']['data'])\n",
890
+       "    });\n",
891
+       "    return ws;\n",
892
+       "}\n",
893
+       "\n",
894
+       "mpl.mpl_figure_comm = function(comm, msg) {\n",
895
+       "    // This is the function which gets called when the mpl process\n",
896
+       "    // starts-up an IPython Comm through the \"matplotlib\" channel.\n",
897
+       "\n",
898
+       "    var id = msg.content.data.id;\n",
899
+       "    // Get hold of the div created by the display call when the Comm\n",
900
+       "    // socket was opened in Python.\n",
901
+       "    var element = $(\"#\" + id);\n",
902
+       "    var ws_proxy = comm_websocket_adapter(comm)\n",
903
+       "\n",
904
+       "    function ondownload(figure, format) {\n",
905
+       "        window.open(figure.imageObj.src);\n",
906
+       "    }\n",
907
+       "\n",
908
+       "    var fig = new mpl.figure(id, ws_proxy,\n",
909
+       "                           ondownload,\n",
910
+       "                           element.get(0));\n",
911
+       "\n",
912
+       "    // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n",
913
+       "    // web socket which is closed, not our websocket->open comm proxy.\n",
914
+       "    ws_proxy.onopen();\n",
915
+       "\n",
916
+       "    fig.parent_element = element.get(0);\n",
917
+       "    fig.cell_info = mpl.find_output_cell(\"<div id='\" + id + \"'></div>\");\n",
918
+       "    if (!fig.cell_info) {\n",
919
+       "        console.error(\"Failed to find cell for figure\", id, fig);\n",
920
+       "        return;\n",
921
+       "    }\n",
922
+       "\n",
923
+       "    var output_index = fig.cell_info[2]\n",
924
+       "    var cell = fig.cell_info[0];\n",
925
+       "\n",
926
+       "};\n",
927
+       "\n",
928
+       "mpl.figure.prototype.handle_close = function(fig, msg) {\n",
929
+       "    var width = fig.canvas.width/mpl.ratio\n",
930
+       "    fig.root.unbind('remove')\n",
931
+       "\n",
932
+       "    // Update the output cell to use the data from the current canvas.\n",
933
+       "    fig.push_to_output();\n",
934
+       "    var dataURL = fig.canvas.toDataURL();\n",
935
+       "    // Re-enable the keyboard manager in IPython - without this line, in FF,\n",
936
+       "    // the notebook keyboard shortcuts fail.\n",
937
+       "    IPython.keyboard_manager.enable()\n",
938
+       "    $(fig.parent_element).html('<img src=\"' + dataURL + '\" width=\"' + width + '\">');\n",
939
+       "    fig.close_ws(fig, msg);\n",
940
+       "}\n",
941
+       "\n",
942
+       "mpl.figure.prototype.close_ws = function(fig, msg){\n",
943
+       "    fig.send_message('closing', msg);\n",
944
+       "    // fig.ws.close()\n",
945
+       "}\n",
946
+       "\n",
947
+       "mpl.figure.prototype.push_to_output = function(remove_interactive) {\n",
948
+       "    // Turn the data on the canvas into data in the output cell.\n",
949
+       "    var width = this.canvas.width/mpl.ratio\n",
950
+       "    var dataURL = this.canvas.toDataURL();\n",
951
+       "    this.cell_info[1]['text/html'] = '<img src=\"' + dataURL + '\" width=\"' + width + '\">';\n",
952
+       "}\n",
953
+       "\n",
954
+       "mpl.figure.prototype.updated_canvas_event = function() {\n",
955
+       "    // Tell IPython that the notebook contents must change.\n",
956
+       "    IPython.notebook.set_dirty(true);\n",
957
+       "    this.send_message(\"ack\", {});\n",
958
+       "    var fig = this;\n",
959
+       "    // Wait a second, then push the new image to the DOM so\n",
960
+       "    // that it is saved nicely (might be nice to debounce this).\n",
961
+       "    setTimeout(function () { fig.push_to_output() }, 1000);\n",
962
+       "}\n",
963
+       "\n",
964
+       "mpl.figure.prototype._init_toolbar = function() {\n",
965
+       "    var fig = this;\n",
966
+       "\n",
967
+       "    var nav_element = $('<div/>')\n",
968
+       "    nav_element.attr('style', 'width: 100%');\n",
969
+       "    this.root.append(nav_element);\n",
970
+       "\n",
971
+       "    // Define a callback function for later on.\n",
972
+       "    function toolbar_event(event) {\n",
973
+       "        return fig.toolbar_button_onclick(event['data']);\n",
974
+       "    }\n",
975
+       "    function toolbar_mouse_event(event) {\n",
976
+       "        return fig.toolbar_button_onmouseover(event['data']);\n",
977
+       "    }\n",
978
+       "\n",
979
+       "    for(var toolbar_ind in mpl.toolbar_items){\n",
980
+       "        var name = mpl.toolbar_items[toolbar_ind][0];\n",
981
+       "        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
982
+       "        var image = mpl.toolbar_items[toolbar_ind][2];\n",
983
+       "        var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
984
+       "\n",
985
+       "        if (!name) { continue; };\n",
986
+       "\n",
987
+       "        var button = $('<button class=\"btn btn-default\" href=\"#\" title=\"' + name + '\"><i class=\"fa ' + image + ' fa-lg\"></i></button>');\n",
988
+       "        button.click(method_name, toolbar_event);\n",
989
+       "        button.mouseover(tooltip, toolbar_mouse_event);\n",
990
+       "        nav_element.append(button);\n",
991
+       "    }\n",
992
+       "\n",
993
+       "    // Add the status bar.\n",
994
+       "    var status_bar = $('<span class=\"mpl-message\" style=\"text-align:right; float: right;\"/>');\n",
995
+       "    nav_element.append(status_bar);\n",
996
+       "    this.message = status_bar[0];\n",
997
+       "\n",
998
+       "    // Add the close button to the window.\n",
999
+       "    var buttongrp = $('<div class=\"btn-group inline pull-right\"></div>');\n",
1000
+       "    var button = $('<button class=\"btn btn-mini btn-primary\" href=\"#\" title=\"Stop Interaction\"><i class=\"fa fa-power-off icon-remove icon-large\"></i></button>');\n",
1001
+       "    button.click(function (evt) { fig.handle_close(fig, {}); } );\n",
1002
+       "    button.mouseover('Stop Interaction', toolbar_mouse_event);\n",
1003
+       "    buttongrp.append(button);\n",
1004
+       "    var titlebar = this.root.find($('.ui-dialog-titlebar'));\n",
1005
+       "    titlebar.prepend(buttongrp);\n",
1006
+       "}\n",
1007
+       "\n",
1008
+       "mpl.figure.prototype._root_extra_style = function(el){\n",
1009
+       "    var fig = this\n",
1010
+       "    el.on(\"remove\", function(){\n",
1011
+       "\tfig.close_ws(fig, {});\n",
1012
+       "    });\n",
1013
+       "}\n",
1014
+       "\n",
1015
+       "mpl.figure.prototype._canvas_extra_style = function(el){\n",
1016
+       "    // this is important to make the div 'focusable\n",
1017
+       "    el.attr('tabindex', 0)\n",
1018
+       "    // reach out to IPython and tell the keyboard manager to turn it's self\n",
1019
+       "    // off when our div gets focus\n",
1020
+       "\n",
1021
+       "    // location in version 3\n",
1022
+       "    if (IPython.notebook.keyboard_manager) {\n",
1023
+       "        IPython.notebook.keyboard_manager.register_events(el);\n",
1024
+       "    }\n",
1025
+       "    else {\n",
1026
+       "        // location in version 2\n",
1027
+       "        IPython.keyboard_manager.register_events(el);\n",
1028
+       "    }\n",
1029
+       "\n",
1030
+       "}\n",
1031
+       "\n",
1032
+       "mpl.figure.prototype._key_event_extra = function(event, name) {\n",
1033
+       "    var manager = IPython.notebook.keyboard_manager;\n",
1034
+       "    if (!manager)\n",
1035
+       "        manager = IPython.keyboard_manager;\n",
1036
+       "\n",
1037
+       "    // Check for shift+enter\n",
1038
+       "    if (event.shiftKey && event.which == 13) {\n",
1039
+       "        this.canvas_div.blur();\n",
1040
+       "        // select the cell after this one\n",
1041
+       "        var index = IPython.notebook.find_cell_index(this.cell_info[0]);\n",
1042
+       "        IPython.notebook.select(index + 1);\n",
1043
+       "    }\n",
1044
+       "}\n",
1045
+       "\n",
1046
+       "mpl.figure.prototype.handle_save = function(fig, msg) {\n",
1047
+       "    fig.ondownload(fig, null);\n",
1048
+       "}\n",
1049
+       "\n",
1050
+       "\n",
1051
+       "mpl.find_output_cell = function(html_output) {\n",
1052
+       "    // Return the cell and output element which can be found *uniquely* in the notebook.\n",
1053
+       "    // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n",
1054
+       "    // IPython event is triggered only after the cells have been serialised, which for\n",
1055
+       "    // our purposes (turning an active figure into a static one), is too late.\n",
1056
+       "    var cells = IPython.notebook.get_cells();\n",
1057
+       "    var ncells = cells.length;\n",
1058
+       "    for (var i=0; i<ncells; i++) {\n",
1059
+       "        var cell = cells[i];\n",
1060
+       "        if (cell.cell_type === 'code'){\n",
1061
+       "            for (var j=0; j<cell.output_area.outputs.length; j++) {\n",
1062
+       "                var data = cell.output_area.outputs[j];\n",
1063
+       "                if (data.data) {\n",
1064
+       "                    // IPython >= 3 moved mimebundle to data attribute of output\n",
1065
+       "                    data = data.data;\n",
1066
+       "                }\n",
1067
+       "                if (data['text/html'] == html_output) {\n",
1068
+       "                    return [cell, data, j];\n",
1069
+       "                }\n",
1070
+       "            }\n",
1071
+       "        }\n",
1072
+       "    }\n",
1073
+       "}\n",
1074
+       "\n",
1075
+       "// Register the function which deals with the matplotlib target/channel.\n",
1076
+       "// The kernel may be null if the page has been refreshed.\n",
1077
+       "if (IPython.notebook.kernel != null) {\n",
1078
+       "    IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n",
1079
+       "}\n"
1080
+      ],
1081
+      "text/plain": [
1082
+       "<IPython.core.display.Javascript object>"
1083
+      ]
1084
+     },
1085
+     "metadata": {},
1086
+     "output_type": "display_data"
1087
+    },
1088
+    {
1089
+     "data": {
1090
+      "text/html": [