|
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adafruit Scraper\n",
"\n",
"Quick script that loads a file with a list of product links from [adafruit](http://www.adfruit.com), then scrapes each page to grab the title, price and image url, and finally writes all the data in to a JSON file."
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>url</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>https://www.adafruit.com/product/1782</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>https://www.adafruit.com/product/1766</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>https://www.adafruit.com/product/2652</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>https://www.adafruit.com/product/189</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>https://www.adafruit.com/product/439</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" url\n",
"0 https://www.adafruit.com/product/1782\n",
"1 https://www.adafruit.com/product/1766\n",
"2 https://www.adafruit.com/product/2652\n",
"3 https://www.adafruit.com/product/189\n",
"4 https://www.adafruit.com/product/439"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# import Data\n",
"import pandas as pd\n",
"data = pd.read_csv('adafruit-links.txt', sep=\" \", header = None)\n",
"data.columns = [\"url\"]\n",
"data.head() # Will show the DataFrame in Jupyter Notebooks"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'title': 'MCP9808 High Accuracy I2C Temperature Sensor Breakout Board', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1782', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1782-00.jpg'}\n",
"{'title': 'Fast Vibration Sensor Switch (Easy to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1766', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1766-00.jpg'}\n",
"{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n",
"{'title': 'PIR (motion) sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/189', 'image_url': 'https://cdn-shop.adafruit.com/970x728/189-00.jpg'}\n",
"{'title': 'Adafruit TSL2561 Digital Luminosity/Lux/Light Sensor Breakout', 'price': '5.95', 'url': 'https://www.adafruit.com/product/439', 'image_url': 'https://cdn-shop.adafruit.com/970x728/439-00.jpg'}\n",
"{'title': 'Medium Vibration Sensor Switch', 'price': '0.95', 'url': 'https://www.adafruit.com/product/2384', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2384-00.jpg'}\n",
"{'title': 'Membrane 3x4 Matrix Keypad + extras - 3x4', 'price': '3.95', 'url': 'https://www.adafruit.com/product/419', 'image_url': 'https://cdn-shop.adafruit.com/970x728/419-05.jpg'}\n",
"{'title': 'Slow Vibration Sensor Switch (Hard to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1767', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1767-00.jpg'}\n",
"{'title': 'Adafruit BMP280 I2C or SPI Barometric Pressure & Altitude Sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/2651', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2651-00.jpg'}\n",
"{'title': 'Adafruit Si7021 Temperature & Humidity Sensor Breakout Board', 'price': '6.95', 'url': 'https://www.adafruit.com/product/3251', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3251-00.jpg'}\n",
"{'title': 'Magnetic contact switch (door sensor)', 'price': '3.95', 'url': 'https://www.adafruit.com/product/375', 'image_url': 'https://cdn-shop.adafruit.com/970x728/375-00.jpg'}\n",
"{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n",
"{'title': 'Tilt ball switch', 'price': '2.00', 'url': 'https://www.adafruit.com/product/173', 'image_url': 'https://cdn-shop.adafruit.com/970x728/173-00.jpg'}\n",
"{'title': 'TMP36 - Analog Temperature sensor - TMP36', 'price': '1.50', 'url': 'https://www.adafruit.com/product/165', 'image_url': 'https://cdn-shop.adafruit.com/970x728/165-00.jpg'}\n",
"{'title': 'Mini 8-Way Rotary Selector Switch - SP8T', 'price': '1.95', 'url': 'https://www.adafruit.com/product/2925', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2925-00.jpg'}\n",
"{'title': 'Waterproof DS18B20 Digital temperature sensor + extras', 'price': '9.95', 'url': 'https://www.adafruit.com/product/381', 'image_url': 'https://cdn-shop.adafruit.com/970x728/381-00.jpg'}\n",
"{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n",
"{'title': 'Raspberry Pi Zero W Camera Pack - Includes Pi Zero W', 'price': '44.95', 'url': 'https://www.adafruit.com/product/3414', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3414-05.jpg'}\n",
"{'title': 'Mechanical Decade Counters - Small Size - Pack of 5', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1084', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1084-00.jpg'}\n",
"{'title': 'DHT11 basic temperature-humidity sensor + extras', 'price': '5.00', 'url': 'https://www.adafruit.com/product/386', 'image_url': 'https://cdn-shop.adafruit.com/970x728/386-00.jpg'}\n",
"{'title': 'Electroluminescent (EL) Panel - 10cm x 10cm Blue', 'price': '13.95', 'url': 'https://www.adafruit.com/product/624', 'image_url': 'https://cdn-shop.adafruit.com/970x728/624-00.jpg'}\n",
"{'title': 'IDC Breakout Helper - 2x20 (40 pin)', 'price': '2.25', 'url': 'https://www.adafruit.com/product/2270', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2270-04.jpg'}\n",
"{'title': 'Peltier Thermo-Electric Cooler Module - 12V 5A', 'price': '11.95', 'url': 'https://www.adafruit.com/product/1330', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1330-02.jpg'}\n",
"{'title': 'Membrane LED Keypad + extras', 'price': '2.95', 'url': 'https://www.adafruit.com/product/1333', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1333-00.jpg'}\n",
"{'title': 'Peltier Thermo-Electric Cooler Module - 5V 1A', 'price': '14.95', 'url': 'https://www.adafruit.com/product/1331', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1331-04.jpg'}\n",
"{'title': 'Adafruit Feather HUZZAH with ESP8266 WiFi', 'price': '16.95', 'url': 'https://www.adafruit.com/product/2821', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2821-01.jpg'}\n",
"{'title': 'Bi-Color (Red/Green) 24-Bar Bargraph w/I2C Backpack Kit', 'price': '9.95', 'url': 'https://www.adafruit.com/product/1721', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1721-00.jpg'}\n",
"{'title': '10 Segment Light Bar Graph LED Display - Blue - KWL-R1025BB', 'price': '1.95', 'url': 'https://www.adafruit.com/product/1815', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1815-04.jpg'}\n",
"{'title': 'Tower Light - Red Alert Light with Buzzer - 12VDC', 'price': '24.95', 'url': 'https://www.adafruit.com/product/2994', 'image_url': 'https://cdn-shop.adafruit.com/product-videos/1024x768/2994-01.jpg'}\n",
"{'title': 'Miniature 8x8 Red LED Matrix', 'price': '3.95', 'url': 'https://www.adafruit.com/product/454', 'image_url': 'https://cdn-shop.adafruit.com/970x728/454-04.jpg'}\n",
"{'title': 'Small 1.2\" 8x8 Bi-Color (Red/Green) Square LED Matrix', 'price': '7.95', 'url': 'https://www.adafruit.com/product/458', 'image_url': 'https://cdn-shop.adafruit.com/970x728/458-00.jpg'}\n",
"{'title': 'Stereo Bonnet Pack for Raspberry Pi Zero W - Includes Pi Zero W', 'price': '34.95', 'url': 'https://www.adafruit.com/product/3412', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3412-01.jpg'}\n",
"{'title': 'LED Illuminated Pushbutton - 30mm Square', 'price': '3.95', 'url': 'https://www.adafruit.com/product/491', 'image_url': 'https://cdn-shop.adafruit.com/970x728/491-00.jpg'}\n",
"{'title': 'Adafruit 128x64 OLED Bonnet for Raspberry Pi', 'price': '22.50', 'url': 'https://www.adafruit.com/product/3531', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3531-00.jpg'}\n",
"{'title': 'Adafruit Sensiron SHT31-D Temperature & Humidity Sensor Breakout', 'price': '13.95', 'url': 'https://www.adafruit.com/product/2857', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2857-04.jpg'}\n",
"{'title': 'Adafruit PiOLED - 128x32 Monochrome OLED Add-on for Raspberry Pi', 'price': '14.95', 'url': 'https://www.adafruit.com/product/3527', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3527-04.jpg'}\n",
"{'title': 'Micro B USB Cable with LCD Voltage / Current Display', 'price': '7.50', 'url': 'https://www.adafruit.com/product/3388', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3388-01.jpg'}\n",
"{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n",
"{'title': 'Adafruit I2S 3W Stereo Speaker Bonnet for Raspberry Pi - Mini Kit', 'price': '12.95', 'url': 'https://www.adafruit.com/product/3346', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3346-01.jpg'}\n"
]
}
],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"\n",
"list = []\n",
"\n",
"for a in data.url:\n",
" result = requests.get(a)\n",
" c = result.content\n",
" soup = BeautifulSoup(c, \"html5lib\")\n",
" title = soup.find_all(\"h1\")[0].string\n",
" price = soup.find(itemprop=\"price\").get(\"content\")\n",
" image_url = soup.find(itemprop=\"image\").get(\"src\")\n",
" obj = {\n",
" \"title\": title,\n",
" \"price\": price,\n",
" \"url\": a,\n",
" \"image_url\": image_url\n",
" }\n",
" list.append(obj)\n",
" print(obj)\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>308</th>\n",
" <th>309</th>\n",
" <th>310</th>\n",
" <th>311</th>\n",
" <th>312</th>\n",
" <th>313</th>\n",
" <th>314</th>\n",
" <th>315</th>\n",
" <th>316</th>\n",
" <th>317</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[{\"title\":</td>\n",
" <td>MCP9808 High Accuracy I2C Temperature Sensor B...</td>\n",
" <td>price:</td>\n",
" <td>4.95,</td>\n",
" <td>url:</td>\n",
" <td>https://www.adafruit.com/product/1782,</td>\n",
" <td>image_url:</td>\n",
" <td>https://cdn-shop.adafruit.com/970x728/1782-00....</td>\n",
" <td>{\"title\":</td>\n",
" <td>Fast Vibration Sensor Switch (Easy to trigger),</td>\n",
" <td>...</td>\n",
" <td>image_url:</td>\n",
" <td>https://cdn-shop.adafruit.com/970x728/2652-00....</td>\n",
" <td>{\"title\":</td>\n",
" <td>Adafruit I2S 3W Stereo Speaker Bonnet for Rasp...</td>\n",
" <td>price:</td>\n",
" <td>12.95,</td>\n",
" <td>url:</td>\n",
" <td>https://www.adafruit.com/product/3346,</td>\n",
" <td>image_url:</td>\n",
" <td>https://cdn-shop.adafruit.com/970x728/3346-01....</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 318 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 \\\n",
"0 [{\"title\": MCP9808 High Accuracy I2C Temperature Sensor B... price: \n",
"\n",
" 3 4 5 6 \\\n",
"0 4.95, url: https://www.adafruit.com/product/1782, image_url: \n",
"\n",
" 7 8 \\\n",
"0 https://cdn-shop.adafruit.com/970x728/1782-00.... {\"title\": \n",
"\n",
" 9 \\\n",
"0 Fast Vibration Sensor Switch (Easy to trigger), \n",
"\n",
" ... 308 \\\n",
"0 ... image_url: \n",
"\n",
" 309 310 \\\n",
"0 https://cdn-shop.adafruit.com/970x728/2652-00.... {\"title\": \n",
"\n",
" 311 312 313 314 \\\n",
"0 Adafruit I2S 3W Stereo Speaker Bonnet for Rasp... price: 12.95, url: \n",
"\n",
" 315 316 \\\n",
"0 https://www.adafruit.com/product/3346, image_url: \n",
"\n",
" 317 \n",
"0 https://cdn-shop.adafruit.com/970x728/3346-01.... \n",
"\n",
"[1 rows x 318 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Export Data\n",
"import json\n",
"#out = list.to_json(orient='records', lines=True)\n",
"out = json.dumps(list)\n",
"with open('adafruit-components.json', 'w') as f:\n",
" f.write(out)\n",
"new_file = pd.read_csv('adafruit-components.json', sep=\" \", header = None)\n",
"new_file.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|