No Description

adafruit-scraper.ipynb 15KB

    { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Adafruit Scraper\n", "\n", "Quick script that loads a file with a list of product links from [adafruit](http://www.adfruit.com), then scrapes each page to grab the title, price and image url, and finally writes all the data in to a JSON file." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>url</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>https://www.adafruit.com/product/1782</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>https://www.adafruit.com/product/1766</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>https://www.adafruit.com/product/2652</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>https://www.adafruit.com/product/189</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>https://www.adafruit.com/product/439</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " url\n", "0 https://www.adafruit.com/product/1782\n", "1 https://www.adafruit.com/product/1766\n", "2 https://www.adafruit.com/product/2652\n", "3 https://www.adafruit.com/product/189\n", "4 https://www.adafruit.com/product/439" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# import Data\n", "import pandas as pd\n", "data = pd.read_csv('adafruit-links.txt', sep=\" \", header = None)\n", "data.columns = [\"url\"]\n", "data.head() # Will show the DataFrame in Jupyter Notebooks" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'title': 'MCP9808 High Accuracy I2C Temperature Sensor Breakout Board', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1782', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1782-00.jpg'}\n", "{'title': 'Fast Vibration Sensor Switch (Easy to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1766', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1766-00.jpg'}\n", "{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n", "{'title': 'PIR (motion) sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/189', 'image_url': 'https://cdn-shop.adafruit.com/970x728/189-00.jpg'}\n", "{'title': 'Adafruit TSL2561 Digital Luminosity/Lux/Light Sensor Breakout', 'price': '5.95', 'url': 'https://www.adafruit.com/product/439', 'image_url': 'https://cdn-shop.adafruit.com/970x728/439-00.jpg'}\n", "{'title': 'Medium Vibration Sensor Switch', 'price': '0.95', 'url': 'https://www.adafruit.com/product/2384', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2384-00.jpg'}\n", "{'title': 'Membrane 3x4 Matrix Keypad + extras - 3x4', 'price': '3.95', 'url': 'https://www.adafruit.com/product/419', 'image_url': 'https://cdn-shop.adafruit.com/970x728/419-05.jpg'}\n", "{'title': 'Slow Vibration Sensor Switch (Hard to trigger)', 'price': '0.95', 'url': 'https://www.adafruit.com/product/1767', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1767-00.jpg'}\n", "{'title': 'Adafruit BMP280 I2C or SPI Barometric Pressure & Altitude Sensor', 'price': '9.95', 'url': 'https://www.adafruit.com/product/2651', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2651-00.jpg'}\n", "{'title': 'Adafruit Si7021 Temperature & Humidity Sensor Breakout Board', 'price': '6.95', 'url': 'https://www.adafruit.com/product/3251', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3251-00.jpg'}\n", "{'title': 'Magnetic contact switch (door sensor)', 'price': '3.95', 'url': 'https://www.adafruit.com/product/375', 'image_url': 'https://cdn-shop.adafruit.com/970x728/375-00.jpg'}\n", "{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n", "{'title': 'Tilt ball switch', 'price': '2.00', 'url': 'https://www.adafruit.com/product/173', 'image_url': 'https://cdn-shop.adafruit.com/970x728/173-00.jpg'}\n", "{'title': 'TMP36 - Analog Temperature sensor - TMP36', 'price': '1.50', 'url': 'https://www.adafruit.com/product/165', 'image_url': 'https://cdn-shop.adafruit.com/970x728/165-00.jpg'}\n", "{'title': 'Mini 8-Way Rotary Selector Switch - SP8T', 'price': '1.95', 'url': 'https://www.adafruit.com/product/2925', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2925-00.jpg'}\n", "{'title': 'Waterproof DS18B20 Digital temperature sensor + extras', 'price': '9.95', 'url': 'https://www.adafruit.com/product/381', 'image_url': 'https://cdn-shop.adafruit.com/970x728/381-00.jpg'}\n", "{'title': 'DS18B20 Digital temperature sensor + extras', 'price': '3.95', 'url': 'https://www.adafruit.com/product/374', 'image_url': 'https://cdn-shop.adafruit.com/970x728/374-00.jpg'}\n", "{'title': 'Raspberry Pi Zero W Camera Pack - Includes Pi Zero W', 'price': '44.95', 'url': 'https://www.adafruit.com/product/3414', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3414-05.jpg'}\n", "{'title': 'Mechanical Decade Counters - Small Size - Pack of 5', 'price': '4.95', 'url': 'https://www.adafruit.com/product/1084', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1084-00.jpg'}\n", "{'title': 'DHT11 basic temperature-humidity sensor + extras', 'price': '5.00', 'url': 'https://www.adafruit.com/product/386', 'image_url': 'https://cdn-shop.adafruit.com/970x728/386-00.jpg'}\n", "{'title': 'Electroluminescent (EL) Panel - 10cm x 10cm Blue', 'price': '13.95', 'url': 'https://www.adafruit.com/product/624', 'image_url': 'https://cdn-shop.adafruit.com/970x728/624-00.jpg'}\n", "{'title': 'IDC Breakout Helper - 2x20 (40 pin)', 'price': '2.25', 'url': 'https://www.adafruit.com/product/2270', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2270-04.jpg'}\n", "{'title': 'Peltier Thermo-Electric Cooler Module - 12V 5A', 'price': '11.95', 'url': 'https://www.adafruit.com/product/1330', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1330-02.jpg'}\n", "{'title': 'Membrane LED Keypad + extras', 'price': '2.95', 'url': 'https://www.adafruit.com/product/1333', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1333-00.jpg'}\n", "{'title': 'Peltier Thermo-Electric Cooler Module - 5V 1A', 'price': '14.95', 'url': 'https://www.adafruit.com/product/1331', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1331-04.jpg'}\n", "{'title': 'Adafruit Feather HUZZAH with ESP8266 WiFi', 'price': '16.95', 'url': 'https://www.adafruit.com/product/2821', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2821-01.jpg'}\n", "{'title': 'Bi-Color (Red/Green) 24-Bar Bargraph w/I2C Backpack Kit', 'price': '9.95', 'url': 'https://www.adafruit.com/product/1721', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1721-00.jpg'}\n", "{'title': '10 Segment Light Bar Graph LED Display - Blue - KWL-R1025BB', 'price': '1.95', 'url': 'https://www.adafruit.com/product/1815', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1815-04.jpg'}\n", "{'title': 'Tower Light - Red Alert Light with Buzzer - 12VDC', 'price': '24.95', 'url': 'https://www.adafruit.com/product/2994', 'image_url': 'https://cdn-shop.adafruit.com/product-videos/1024x768/2994-01.jpg'}\n", "{'title': 'Miniature 8x8 Red LED Matrix', 'price': '3.95', 'url': 'https://www.adafruit.com/product/454', 'image_url': 'https://cdn-shop.adafruit.com/970x728/454-04.jpg'}\n", "{'title': 'Small 1.2\" 8x8 Bi-Color (Red/Green) Square LED Matrix', 'price': '7.95', 'url': 'https://www.adafruit.com/product/458', 'image_url': 'https://cdn-shop.adafruit.com/970x728/458-00.jpg'}\n", "{'title': 'Stereo Bonnet Pack for Raspberry Pi Zero W - Includes Pi Zero W', 'price': '34.95', 'url': 'https://www.adafruit.com/product/3412', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3412-01.jpg'}\n", "{'title': 'LED Illuminated Pushbutton - 30mm Square', 'price': '3.95', 'url': 'https://www.adafruit.com/product/491', 'image_url': 'https://cdn-shop.adafruit.com/970x728/491-00.jpg'}\n", "{'title': 'Adafruit 128x64 OLED Bonnet for Raspberry Pi', 'price': '22.50', 'url': 'https://www.adafruit.com/product/3531', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3531-00.jpg'}\n", "{'title': 'Adafruit Sensiron SHT31-D Temperature & Humidity Sensor Breakout', 'price': '13.95', 'url': 'https://www.adafruit.com/product/2857', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2857-04.jpg'}\n", "{'title': 'Adafruit PiOLED - 128x32 Monochrome OLED Add-on for Raspberry Pi', 'price': '14.95', 'url': 'https://www.adafruit.com/product/3527', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3527-04.jpg'}\n", "{'title': 'Micro B USB Cable with LCD Voltage / Current Display', 'price': '7.50', 'url': 'https://www.adafruit.com/product/3388', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3388-01.jpg'}\n", "{'title': 'Adafruit BME280 I2C or SPI Temperature Humidity Pressure Sensor', 'price': '19.95', 'url': 'https://www.adafruit.com/product/2652', 'image_url': 'https://cdn-shop.adafruit.com/970x728/2652-00.jpg'}\n", "{'title': 'Adafruit I2S 3W Stereo Speaker Bonnet for Raspberry Pi - Mini Kit', 'price': '12.95', 'url': 'https://www.adafruit.com/product/3346', 'image_url': 'https://cdn-shop.adafruit.com/970x728/3346-01.jpg'}\n" ] } ], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "list = []\n", "\n", "for a in data.url:\n", " result = requests.get(a)\n", " c = result.content\n", " soup = BeautifulSoup(c, \"html5lib\")\n", " title = soup.find_all(\"h1\")[0].string\n", " price = soup.find(itemprop=\"price\").get(\"content\")\n", " image_url = soup.find(itemprop=\"image\").get(\"src\")\n", " obj = {\n", " \"title\": title,\n", " \"price\": price,\n", " \"url\": a,\n", " \"image_url\": image_url\n", " }\n", " list.append(obj)\n", " print(obj)\n" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>0</th>\n", " <th>1</th>\n", " <th>2</th>\n", " <th>3</th>\n", " <th>4</th>\n", " <th>5</th>\n", " <th>6</th>\n", " <th>7</th>\n", " <th>8</th>\n", " <th>9</th>\n", " <th>...</th>\n", " <th>308</th>\n", " <th>309</th>\n", " <th>310</th>\n", " <th>311</th>\n", " <th>312</th>\n", " <th>313</th>\n", " <th>314</th>\n", " <th>315</th>\n", " <th>316</th>\n", " <th>317</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[{\"title\":</td>\n", " <td>MCP9808 High Accuracy I2C Temperature Sensor B...</td>\n", " <td>price:</td>\n", " <td>4.95,</td>\n", " <td>url:</td>\n", " <td>https://www.adafruit.com/product/1782,</td>\n", " <td>image_url:</td>\n", " <td>https://cdn-shop.adafruit.com/970x728/1782-00....</td>\n", " <td>{\"title\":</td>\n", " <td>Fast Vibration Sensor Switch (Easy to trigger),</td>\n", " <td>...</td>\n", " <td>image_url:</td>\n", " <td>https://cdn-shop.adafruit.com/970x728/2652-00....</td>\n", " <td>{\"title\":</td>\n", " <td>Adafruit I2S 3W Stereo Speaker Bonnet for Rasp...</td>\n", " <td>price:</td>\n", " <td>12.95,</td>\n", " <td>url:</td>\n", " <td>https://www.adafruit.com/product/3346,</td>\n", " <td>image_url:</td>\n", " <td>https://cdn-shop.adafruit.com/970x728/3346-01....</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1 rows × 318 columns</p>\n", "</div>" ], "text/plain": [ " 0 1 2 \\\n", "0 [{\"title\": MCP9808 High Accuracy I2C Temperature Sensor B... price: \n", "\n", " 3 4 5 6 \\\n", "0 4.95, url: https://www.adafruit.com/product/1782, image_url: \n", "\n", " 7 8 \\\n", "0 https://cdn-shop.adafruit.com/970x728/1782-00.... {\"title\": \n", "\n", " 9 \\\n", "0 Fast Vibration Sensor Switch (Easy to trigger), \n", "\n", " ... 308 \\\n", "0 ... image_url: \n", "\n", " 309 310 \\\n", "0 https://cdn-shop.adafruit.com/970x728/2652-00.... {\"title\": \n", "\n", " 311 312 313 314 \\\n", "0 Adafruit I2S 3W Stereo Speaker Bonnet for Rasp... price: 12.95, url: \n", "\n", " 315 316 \\\n", "0 https://www.adafruit.com/product/3346, image_url: \n", "\n", " 317 \n", "0 https://cdn-shop.adafruit.com/970x728/3346-01.... \n", "\n", "[1 rows x 318 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Export Data\n", "import json\n", "#out = list.to_json(orient='records', lines=True)\n", "out = json.dumps(list)\n", "with open('adafruit-components.json', 'w') as f:\n", " f.write(out)\n", "new_file = pd.read_csv('adafruit-components.json', sep=\" \", header = None)\n", "new_file.head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }