From e5f17991e412b94705179c6908c5e8b747269b7d Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Fri, 5 May 2023 05:13:40 +0000 Subject: automatic import of python-robobrowser --- .gitignore | 1 + python-robobrowser.spec | 384 ++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 386 insertions(+) create mode 100644 python-robobrowser.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..90f98f2 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/robobrowser-0.5.3.tar.gz diff --git a/python-robobrowser.spec b/python-robobrowser.spec new file mode 100644 index 0000000..6c8f4b2 --- /dev/null +++ b/python-robobrowser.spec @@ -0,0 +1,384 @@ +%global _empty_manifest_terminate_build 0 +Name: python-robobrowser +Version: 0.5.3 +Release: 1 +Summary: Your friendly neighborhood web scraper +License: MIT +URL: https://github.com/jmcarp/robobrowser +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/f3/53/fd527e78fe8bdf7ec24b3b821c88c2dcbbe63d120c838f80f12ed0a0fac6/robobrowser-0.5.3.tar.gz +BuildArch: noarch + + +%description +Homepage: `http://robobrowser.readthedocs.org/ `_ +RoboBrowser is a simple, Pythonic library for browsing the web without a standalone web browser. RoboBrowser +can fetch a page, click on links and buttons, and fill out and submit forms. If you need to interact with web services +that don't have APIs, RoboBrowser can help. + import re + from robobrowser import RoboBrowser + # Browse to Genius + browser = RoboBrowser(history=True) + browser.open('http://genius.com/') + # Search for Porcupine Tree + form = browser.get_form(action='/search') + form # + form['q'].value = 'porcupine tree' + browser.submit_form(form) + # Look up the first song + songs = browser.select('.song_link') + browser.follow_link(songs[0]) + lyrics = browser.select('.lyrics') + lyrics[0].text # \nHear the sound of music ... + # Back to results page + browser.back() + # Look up my favorite song + song_link = browser.get_link('trains') + browser.follow_link(song_link) + # Can also search HTML using regex patterns + lyrics = browser.find(class_=re.compile(r'\blyrics\b')) + lyrics.text # \nTrain set and match spied under the blind... +RoboBrowser combines the best of two excellent Python libraries: +`Requests `_ and +`BeautifulSoup `_. +RoboBrowser represents browser sessions using Requests and HTML responses +using BeautifulSoup, transparently exposing methods of both libraries: + import re + from robobrowser import RoboBrowser + browser = RoboBrowser(user_agent='a python robot') + browser.open('https://github.com/') + # Inspect the browser session + browser.session.cookies['_gh_sess'] # BAh7Bzo... + browser.session.headers['User-Agent'] # a python robot + # Search the parsed HTML + browser.select('div.teaser-icon') # [
+ # + #
, + # ... + browser.find(class_=re.compile(r'column', re.I)) #