From 1129bedaafcaf025910ab5ca5cbc26ddd47488bd Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Fri, 5 May 2023 06:20:28 +0000 Subject: automatic import of python-webpreview --- .gitignore | 1 + python-webpreview.spec | 548 +++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 550 insertions(+) create mode 100644 python-webpreview.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..c0b7f48 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/webpreview-1.7.2.tar.gz diff --git a/python-webpreview.spec b/python-webpreview.spec new file mode 100644 index 0000000..887f088 --- /dev/null +++ b/python-webpreview.spec @@ -0,0 +1,548 @@ +%global _empty_manifest_terminate_build 0 +Name: python-webpreview +Version: 1.7.2 +Release: 1 +Summary: Extracts OpenGraph, TwitterCard and Schema properties from a webpage. +License: MIT +URL: https://github.com/ludbek/webpreview +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/6e/81/c8ae4f53ba30a3d36b47c128a3e723e1fa6159a7208655283dcaf73f8d05/webpreview-1.7.2.tar.gz +BuildArch: noarch + +Requires: python3-requests +Requires: python3-beautifulsoup4 + +%description +# webpreview + +For a given URL, `webpreview` extracts its **title**, **description**, and **image url** using +[Open Graph](http://ogp.me/), [Twitter Card](https://dev.twitter.com/cards/overview), or +[Schema](http://schema.org/) meta tags, or, as an alternative, parses it as a generic webpage. + +

+ PyPI - Python Version + PyPI + Build status + Code coverage report +

+ + +## Installation + +```shell +pip install webpreview +``` + +## Usage + +Use the generic `webpreview` method (added in *v1.7.0*) to parse the page independent of its nature. +This method fetches a page and tries to extracts a *title, description, and a preview image* from it. + +It first attempts to parse the values from **Open Graph** properties, then it falls back to +**Twitter Card** format, and then to **Schema**. If none of these methods succeed in extracting all +three properties, then the web page's content is parsed using a generic HTML parser. + +```python +>>> from webpreview import webpreview + +>>> p = webpreview("https://en.wikipedia.org/wiki/Enrico_Fermi") +>>> p.title +'Enrico Fermi - Wikipedia' +>>> p.description +'Italian-American physicist (1901–1954)' +>>> p.image +'https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg' + +# Access the parsed fields both as attributes and items +>>> p["url"] == p.url +True + +# Check if all three of the title, description, and image are in the parsing result +>>> p.is_complete() +True + +# Provide page content from somewhere else +>>> content = """ + + + The Dormouse's story + + + +

The Dormouse's story

+ Elsie + + +""" + +# The the function's invocation won't make any external calls, +# only relying on the supplied content, unlike the example above +>>> webpreview("aa.com", content=content) +WebPreview(url="http://aa.com", title="The Dormouse's story", description="A Mad Tea-Party story") +``` + +### Using the command line + +When `webpreview` is installed via `pip`, then the accompanying command-line tool is +installed alongside. + +```shell +$ webpreview https://en.wikipedia.org/wiki/Enrico_Fermi +title: Enrico Fermi - Wikipedia +description: Italian-American physicist (1901–1954) +image: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg + +$ webpreview https://github.com/ --absolute-url +title: GitHub: Where the world builds software +description: GitHub is where over 83 million developers shape the future of software, together. +image: https://github.githubassets.com/images/modules/site/social-cards/github-social.png +``` + +### Using compatibility API + +Before *v1.7.0* the package mainly exposed a different set of the API methods. +All of them are supported and may continue to be used. + +```python +# WARNING: +# The API below is left for BACKWARD COMPATIBILITY ONLY. + +from webpreview import web_preview +title, description, image = web_preview("aurl.com") + +# specifing timeout which gets passed to requests.get() +title, description, image = web_preview("a_slow_url.com", timeout=1000) + +# passing headers +headers = {'User-Agent': 'Mozilla/5.0'} +title, description, image = web_preview("a_slow_url.com", headers=headers) + +# pass html content thus avoiding making http call again to fetch content. +content = """Dummy HTML""" +title, description, image = web_preview("aurl.com", content=content) + +# specifing the parser +# by default webpreview uses 'html.parser' +title, description, image = web_preview("aurl.com", content=content, parser='lxml') +``` + +## Run with Docker + +The docker image can be built and ran similarly to the command line. +The default entry point is the `webpreview` command-line function. + +```shell +$ docker build -t webpreview . +$ docker run -it --rm webpreview "https://en.m.wikipedia.org/wiki/Enrico_Fermi" +title: Enrico Fermi - Wikipedia +description: Enrico Fermi (Italian: [enˈriːko ˈfermi]; 29 September 1901 – 28 November 1954) was an Italian (later naturalized American) physicist and the creator of the world's first nuclear reactor, the Chicago Pile-1. He has been called the "architect of the nuclear age"[1] and the "architect of the atomic bomb". +image: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg +``` + +*Note*: built docker image weighs around 210MB. + +## Testing + +```shell +# Execute the tests +poetry run pytest webpreview + +# OR execute until the first failed test +poetry run pytest webpreview -x +``` + +## Setting up development environment + +```shell +# Install a correct minimal supported version of python +pyenv install 3.7.13 + +# Create a virtual environment +# By default, the project already contains a .python-version file that points +# to 3.7.13. +python -m venv .venv + +# Install dependencies +# Poetry will automatically install them into the local .venv +poetry install + +# If you have errors likes this: +ERROR: Can not execute `setup.py` since setuptools is not available in the build environment. + +# Then do this: +.venv/bin/pip install --upgrade setuptools +``` + +%package -n python3-webpreview +Summary: Extracts OpenGraph, TwitterCard and Schema properties from a webpage. +Provides: python-webpreview +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-webpreview +# webpreview + +For a given URL, `webpreview` extracts its **title**, **description**, and **image url** using +[Open Graph](http://ogp.me/), [Twitter Card](https://dev.twitter.com/cards/overview), or +[Schema](http://schema.org/) meta tags, or, as an alternative, parses it as a generic webpage. + +

+ PyPI - Python Version + PyPI + Build status + Code coverage report +

+ + +## Installation + +```shell +pip install webpreview +``` + +## Usage + +Use the generic `webpreview` method (added in *v1.7.0*) to parse the page independent of its nature. +This method fetches a page and tries to extracts a *title, description, and a preview image* from it. + +It first attempts to parse the values from **Open Graph** properties, then it falls back to +**Twitter Card** format, and then to **Schema**. If none of these methods succeed in extracting all +three properties, then the web page's content is parsed using a generic HTML parser. + +```python +>>> from webpreview import webpreview + +>>> p = webpreview("https://en.wikipedia.org/wiki/Enrico_Fermi") +>>> p.title +'Enrico Fermi - Wikipedia' +>>> p.description +'Italian-American physicist (1901–1954)' +>>> p.image +'https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg' + +# Access the parsed fields both as attributes and items +>>> p["url"] == p.url +True + +# Check if all three of the title, description, and image are in the parsing result +>>> p.is_complete() +True + +# Provide page content from somewhere else +>>> content = """ + + + The Dormouse's story + + + +

The Dormouse's story

+ Elsie + + +""" + +# The the function's invocation won't make any external calls, +# only relying on the supplied content, unlike the example above +>>> webpreview("aa.com", content=content) +WebPreview(url="http://aa.com", title="The Dormouse's story", description="A Mad Tea-Party story") +``` + +### Using the command line + +When `webpreview` is installed via `pip`, then the accompanying command-line tool is +installed alongside. + +```shell +$ webpreview https://en.wikipedia.org/wiki/Enrico_Fermi +title: Enrico Fermi - Wikipedia +description: Italian-American physicist (1901–1954) +image: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg + +$ webpreview https://github.com/ --absolute-url +title: GitHub: Where the world builds software +description: GitHub is where over 83 million developers shape the future of software, together. +image: https://github.githubassets.com/images/modules/site/social-cards/github-social.png +``` + +### Using compatibility API + +Before *v1.7.0* the package mainly exposed a different set of the API methods. +All of them are supported and may continue to be used. + +```python +# WARNING: +# The API below is left for BACKWARD COMPATIBILITY ONLY. + +from webpreview import web_preview +title, description, image = web_preview("aurl.com") + +# specifing timeout which gets passed to requests.get() +title, description, image = web_preview("a_slow_url.com", timeout=1000) + +# passing headers +headers = {'User-Agent': 'Mozilla/5.0'} +title, description, image = web_preview("a_slow_url.com", headers=headers) + +# pass html content thus avoiding making http call again to fetch content. +content = """Dummy HTML""" +title, description, image = web_preview("aurl.com", content=content) + +# specifing the parser +# by default webpreview uses 'html.parser' +title, description, image = web_preview("aurl.com", content=content, parser='lxml') +``` + +## Run with Docker + +The docker image can be built and ran similarly to the command line. +The default entry point is the `webpreview` command-line function. + +```shell +$ docker build -t webpreview . +$ docker run -it --rm webpreview "https://en.m.wikipedia.org/wiki/Enrico_Fermi" +title: Enrico Fermi - Wikipedia +description: Enrico Fermi (Italian: [enˈriːko ˈfermi]; 29 September 1901 – 28 November 1954) was an Italian (later naturalized American) physicist and the creator of the world's first nuclear reactor, the Chicago Pile-1. He has been called the "architect of the nuclear age"[1] and the "architect of the atomic bomb". +image: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg +``` + +*Note*: built docker image weighs around 210MB. + +## Testing + +```shell +# Execute the tests +poetry run pytest webpreview + +# OR execute until the first failed test +poetry run pytest webpreview -x +``` + +## Setting up development environment + +```shell +# Install a correct minimal supported version of python +pyenv install 3.7.13 + +# Create a virtual environment +# By default, the project already contains a .python-version file that points +# to 3.7.13. +python -m venv .venv + +# Install dependencies +# Poetry will automatically install them into the local .venv +poetry install + +# If you have errors likes this: +ERROR: Can not execute `setup.py` since setuptools is not available in the build environment. + +# Then do this: +.venv/bin/pip install --upgrade setuptools +``` + +%package help +Summary: Development documents and examples for webpreview +Provides: python3-webpreview-doc +%description help +# webpreview + +For a given URL, `webpreview` extracts its **title**, **description**, and **image url** using +[Open Graph](http://ogp.me/), [Twitter Card](https://dev.twitter.com/cards/overview), or +[Schema](http://schema.org/) meta tags, or, as an alternative, parses it as a generic webpage. + +

+ PyPI - Python Version + PyPI + Build status + Code coverage report +

+ + +## Installation + +```shell +pip install webpreview +``` + +## Usage + +Use the generic `webpreview` method (added in *v1.7.0*) to parse the page independent of its nature. +This method fetches a page and tries to extracts a *title, description, and a preview image* from it. + +It first attempts to parse the values from **Open Graph** properties, then it falls back to +**Twitter Card** format, and then to **Schema**. If none of these methods succeed in extracting all +three properties, then the web page's content is parsed using a generic HTML parser. + +```python +>>> from webpreview import webpreview + +>>> p = webpreview("https://en.wikipedia.org/wiki/Enrico_Fermi") +>>> p.title +'Enrico Fermi - Wikipedia' +>>> p.description +'Italian-American physicist (1901–1954)' +>>> p.image +'https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg' + +# Access the parsed fields both as attributes and items +>>> p["url"] == p.url +True + +# Check if all three of the title, description, and image are in the parsing result +>>> p.is_complete() +True + +# Provide page content from somewhere else +>>> content = """ + + + The Dormouse's story + + + +

The Dormouse's story

+ Elsie + + +""" + +# The the function's invocation won't make any external calls, +# only relying on the supplied content, unlike the example above +>>> webpreview("aa.com", content=content) +WebPreview(url="http://aa.com", title="The Dormouse's story", description="A Mad Tea-Party story") +``` + +### Using the command line + +When `webpreview` is installed via `pip`, then the accompanying command-line tool is +installed alongside. + +```shell +$ webpreview https://en.wikipedia.org/wiki/Enrico_Fermi +title: Enrico Fermi - Wikipedia +description: Italian-American physicist (1901–1954) +image: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg + +$ webpreview https://github.com/ --absolute-url +title: GitHub: Where the world builds software +description: GitHub is where over 83 million developers shape the future of software, together. +image: https://github.githubassets.com/images/modules/site/social-cards/github-social.png +``` + +### Using compatibility API + +Before *v1.7.0* the package mainly exposed a different set of the API methods. +All of them are supported and may continue to be used. + +```python +# WARNING: +# The API below is left for BACKWARD COMPATIBILITY ONLY. + +from webpreview import web_preview +title, description, image = web_preview("aurl.com") + +# specifing timeout which gets passed to requests.get() +title, description, image = web_preview("a_slow_url.com", timeout=1000) + +# passing headers +headers = {'User-Agent': 'Mozilla/5.0'} +title, description, image = web_preview("a_slow_url.com", headers=headers) + +# pass html content thus avoiding making http call again to fetch content. +content = """Dummy HTML""" +title, description, image = web_preview("aurl.com", content=content) + +# specifing the parser +# by default webpreview uses 'html.parser' +title, description, image = web_preview("aurl.com", content=content, parser='lxml') +``` + +## Run with Docker + +The docker image can be built and ran similarly to the command line. +The default entry point is the `webpreview` command-line function. + +```shell +$ docker build -t webpreview . +$ docker run -it --rm webpreview "https://en.m.wikipedia.org/wiki/Enrico_Fermi" +title: Enrico Fermi - Wikipedia +description: Enrico Fermi (Italian: [enˈriːko ˈfermi]; 29 September 1901 – 28 November 1954) was an Italian (later naturalized American) physicist and the creator of the world's first nuclear reactor, the Chicago Pile-1. He has been called the "architect of the nuclear age"[1] and the "architect of the atomic bomb". +image: https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Enrico_Fermi_1943-49.jpg/1200px-Enrico_Fermi_1943-49.jpg +``` + +*Note*: built docker image weighs around 210MB. + +## Testing + +```shell +# Execute the tests +poetry run pytest webpreview + +# OR execute until the first failed test +poetry run pytest webpreview -x +``` + +## Setting up development environment + +```shell +# Install a correct minimal supported version of python +pyenv install 3.7.13 + +# Create a virtual environment +# By default, the project already contains a .python-version file that points +# to 3.7.13. +python -m venv .venv + +# Install dependencies +# Poetry will automatically install them into the local .venv +poetry install + +# If you have errors likes this: +ERROR: Can not execute `setup.py` since setuptools is not available in the build environment. + +# Then do this: +.venv/bin/pip install --upgrade setuptools +``` + +%prep +%autosetup -n webpreview-1.7.2 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-webpreview -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot - 1.7.2-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..e08bae1 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +8432d022f20ca6872680ef6c4237b630 webpreview-1.7.2.tar.gz -- cgit v1.2.3