No Description

scraping-example.ipynb 3.0KB

    { "cells": [ { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from IPython.display import Image\n", "from IPython.core.display import HTML " ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "200" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import requests\n", "result = requests.get(\"https://www.adafruit.com/product/1782\")\n", "c = result.content\n", "result.status_code" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MCP9808 High Accuracy I2C Temperature Sensor Breakout Board\n" ] } ], "source": [ "from bs4 import BeautifulSoup\n", "soup = BeautifulSoup(c, \"html5lib\")\n", "title = soup.find_all(\"h1\")[0].string\n", "print(title)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.95\n" ] } ], "source": [ "price = soup.find(itemprop=\"price\").get(\"content\")\n", "print(price)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<img src=\"https://cdn-shop.adafruit.com/970x728/1782-00.jpg\" width=\"100\" height=\"100\"/>" ], "text/plain": [ "<IPython.core.display.Image object>" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "image_url = soup.find(itemprop=\"image\").get(\"src\")\n", "Image(url = image_url, width=100, height=100)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'title': 'MCP9808 High Accuracy I2C Temperature Sensor Breakout Board', 'price': '4.95', 'image_url': 'https://cdn-shop.adafruit.com/970x728/1782-00.jpg'}\n" ] } ], "source": [ "obj = {\n", " \"title\": title,\n", " \"price\": price,\n", " \"image_url\": image_url\n", "}\n", "print(obj)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }