summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-06-08 20:00:20 +0000
committerCoprDistGit <infra@openeuler.org>2023-06-08 20:00:20 +0000
commitbe9ed828c889fc33005bc0b93aad907c9fc0af59 (patch)
tree03780662a67521f7b5d7cd31fa91f63d8b0456e3
parentcd69cd9c5347906d056a470a2f221fc809cd3081 (diff)
automatic import of python-rss-parseropeneuler20.03
-rw-r--r--.gitignore1
-rw-r--r--python-rss-parser.spec468
-rw-r--r--sources2
3 files changed, 415 insertions, 56 deletions
diff --git a/.gitignore b/.gitignore
index 90904bd..1ffdcfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
/rss-parser-0.2.4.tar.gz
+/rss_parser-1.0.0.tar.gz
diff --git a/python-rss-parser.spec b/python-rss-parser.spec
index 5cfa51e..4d974be 100644
--- a/python-rss-parser.spec
+++ b/python-rss-parser.spec
@@ -1,18 +1,16 @@
%global _empty_manifest_terminate_build 0
Name: python-rss-parser
-Version: 0.2.4
+Version: 1.0.0
Release: 1
Summary: Typed pythonic RSS parser
-License: GPLv3
+License: GPL-3.0
URL: https://dhvcc.github.io/rss-parser
-Source0: https://mirrors.nju.edu.cn/pypi/web/packages/89/56/f8f1330323c0c2828071f1024375452da716b6bcf01cdded465c054a6ced/rss-parser-0.2.4.tar.gz
+Source0: https://mirrors.aliyun.com/pypi/web/packages/d1/bf/662e818701a03cc46c4557576c72ac46c9681bb4e4e04c8874b583f92e86/rss_parser-1.0.0.tar.gz
BuildArch: noarch
-Requires: python3-bs4
Requires: python3-pydantic
-Requires: python3-lxml
-Requires: python3-requests
Requires: python3-pytest
+Requires: python3-xmltodict
%description
# Rss parser
@@ -27,11 +25,12 @@ Requires: python3-pytest
[![License](https://img.shields.io/pypi/l/rss-parser?color=success)](https://github.com/dhvcc/rss-parser/blob/master/LICENSE)
[![GitHub Pages](https://badgen.net/github/status/dhvcc/rss-parser/gh-pages?label=docs)](https://dhvcc.github.io/rss-parser#documentation)
-[![Pypi publish](https://github.com/dhvcc/rss-parser/workflows/Pypi%20publish/badge.svg)](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22)
+![CI](https://github.com/dhvcc/rss-parser/actions/workflows/ci.yml/badge.svg?branch=master)
+![PyPi publish](https://github.com/dhvcc/rss-parser/actions/workflows/publish_to_pypi.yml/badge.svg?branch=master)
## About
-`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic`
+`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict)
## Installation
@@ -44,34 +43,153 @@ or
```bash
git clone https://github.com/dhvcc/rss-parser.git
cd rss-parser
-pip install .
+poetry build
+pip install dist/*.whl
```
## Usage
+### Quickstart
+
```python
from rss_parser import Parser
from requests import get
-rss_url = "https://feedforall.com/sample.xml"
-xml = get(rss_url)
+rss_url = "https://rss.art19.com/apology-line"
+response = get(rss_url)
-# Limit feed output to 5 items
-# To disable limit simply do not provide the argument or use None
-parser = Parser(xml=xml.content, limit=5)
-feed = parser.parse()
+rss = Parser.parse(response.text)
-# Print out feed meta data
-print(feed.language)
-print(feed.version)
+# Print out rss meta data
+print("Language", rss.channel.language)
+print("RSS", rss.version)
# Iteratively print feed items
-for item in feed.feed:
+for item in rss.channel.items:
print(item.title)
- print(item.description)
+ print(item.description[:50])
+
+# Language en
+# RSS 2.0
+# Wondery Presents - Flipping The Bird: Elon vs Twitter
+# <p>When Elon Musk posted a video of himself arrivi
+# Introducing: The Apology Line
+# <p>If you could call a number and say you’re sorry
+```
+
+Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so
+
+```xml
+<![CDATA[<p>If you could call ...</p>]]>
+```
+
+### Overriding schema
+
+If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser
+
+```python
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.rss import RSS
+from rss_parser.models.types import Tag
+
+class CustomSchema(RSS, XMLBaseModel):
+ channel: None = None # Removing previous channel field
+ custom: Tag[str]
+
+with open("tests/samples/custom.xml") as f:
+ data = f.read()
+
+rss = Parser.parse(data, schema=CustomSchema)
+
+print("RSS", rss.version)
+print("Custom", rss.custom)
+
+# RSS 2.0
+# Custom Custom tag data
+```
+
+### xmltodict
+
+This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict)
+
+The basic thing you should know is that your data is processed into dictionaries
+
+For example, this data
+
+```xml
+<tag>content</tag>
+```
+
+will result in the following
+
+```python
+{
+ "tag": "content"
+}
+```
+
+*But*, when handling attributes, the content of the tag will be also a dictionary
+
+```xml
+<tag attr="1" data-value="data">data</tag>
+```
+
+Turns into
+
+```python
+{
+ "tag": {
+ "@attr": "1",
+ "@data-value": "data",
+ "#text": "content"
+ }
+}
+```
+
+### Tag field
+
+This is a generic field that handles tags as raw data or a dictonary returned with attributes
+
+*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value*
+
+Example
+```python
+from rss_parser.models import XMLBaseModel
+class Model(XMLBaseModel):
+ number: Tag[int]
+ string: Tag[str]
+
+m = Model(
+ number=1,
+ string={'@attr': '1', '#text': 'content'},
+)
+
+m.number.content == 1 # Content value is an integer, as per the generic type
+
+m.number.content + 10 == m.number + 10 # But you're still able to use the Tag itself in common operators
+
+m.number.bit_length() == 1 # As it's the case for methods/attributes not found in the Tag itself
+
+type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>) # types are NOT the same, however, the interfaces are very similar most of the time
+
+m.number.attributes == {} # The attributes are empty by default
+
+m.string.attributes == {'attr': '1'} # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted
+
+# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number
+
+m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'}) # This will lead in the following traceback
+
+# Traceback (most recent call last):
+# ...
+# pydantic.error_wrappers.ValidationError: 1 validation error for Model
+# number -> content
+# value is not a valid integer (type=type_error.integer)
```
+**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`**
+
## Contributing
Pull requests are welcome. For major changes, please open an issue first
@@ -109,11 +227,12 @@ BuildRequires: python3-pip
[![License](https://img.shields.io/pypi/l/rss-parser?color=success)](https://github.com/dhvcc/rss-parser/blob/master/LICENSE)
[![GitHub Pages](https://badgen.net/github/status/dhvcc/rss-parser/gh-pages?label=docs)](https://dhvcc.github.io/rss-parser#documentation)
-[![Pypi publish](https://github.com/dhvcc/rss-parser/workflows/Pypi%20publish/badge.svg)](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22)
+![CI](https://github.com/dhvcc/rss-parser/actions/workflows/ci.yml/badge.svg?branch=master)
+![PyPi publish](https://github.com/dhvcc/rss-parser/actions/workflows/publish_to_pypi.yml/badge.svg?branch=master)
## About
-`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic`
+`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict)
## Installation
@@ -126,34 +245,153 @@ or
```bash
git clone https://github.com/dhvcc/rss-parser.git
cd rss-parser
-pip install .
+poetry build
+pip install dist/*.whl
```
## Usage
+### Quickstart
+
```python
from rss_parser import Parser
from requests import get
-rss_url = "https://feedforall.com/sample.xml"
-xml = get(rss_url)
+rss_url = "https://rss.art19.com/apology-line"
+response = get(rss_url)
-# Limit feed output to 5 items
-# To disable limit simply do not provide the argument or use None
-parser = Parser(xml=xml.content, limit=5)
-feed = parser.parse()
+rss = Parser.parse(response.text)
-# Print out feed meta data
-print(feed.language)
-print(feed.version)
+# Print out rss meta data
+print("Language", rss.channel.language)
+print("RSS", rss.version)
# Iteratively print feed items
-for item in feed.feed:
+for item in rss.channel.items:
print(item.title)
- print(item.description)
+ print(item.description[:50])
+
+# Language en
+# RSS 2.0
+# Wondery Presents - Flipping The Bird: Elon vs Twitter
+# <p>When Elon Musk posted a video of himself arrivi
+# Introducing: The Apology Line
+# <p>If you could call a number and say you’re sorry
+```
+
+Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so
+
+```xml
+<![CDATA[<p>If you could call ...</p>]]>
+```
+
+### Overriding schema
+
+If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser
+
+```python
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.rss import RSS
+from rss_parser.models.types import Tag
+
+class CustomSchema(RSS, XMLBaseModel):
+ channel: None = None # Removing previous channel field
+ custom: Tag[str]
+
+with open("tests/samples/custom.xml") as f:
+ data = f.read()
+
+rss = Parser.parse(data, schema=CustomSchema)
+
+print("RSS", rss.version)
+print("Custom", rss.custom)
+
+# RSS 2.0
+# Custom Custom tag data
+```
+
+### xmltodict
+
+This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict)
+
+The basic thing you should know is that your data is processed into dictionaries
+
+For example, this data
+
+```xml
+<tag>content</tag>
+```
+
+will result in the following
+
+```python
+{
+ "tag": "content"
+}
+```
+
+*But*, when handling attributes, the content of the tag will be also a dictionary
+
+```xml
+<tag attr="1" data-value="data">data</tag>
+```
+
+Turns into
+
+```python
+{
+ "tag": {
+ "@attr": "1",
+ "@data-value": "data",
+ "#text": "content"
+ }
+}
+```
+
+### Tag field
+
+This is a generic field that handles tags as raw data or a dictonary returned with attributes
+
+*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value*
+
+Example
+
+```python
+from rss_parser.models import XMLBaseModel
+class Model(XMLBaseModel):
+ number: Tag[int]
+ string: Tag[str]
+
+m = Model(
+ number=1,
+ string={'@attr': '1', '#text': 'content'},
+)
+
+m.number.content == 1 # Content value is an integer, as per the generic type
+
+m.number.content + 10 == m.number + 10 # But you're still able to use the Tag itself in common operators
+
+m.number.bit_length() == 1 # As it's the case for methods/attributes not found in the Tag itself
+type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>) # types are NOT the same, however, the interfaces are very similar most of the time
+
+m.number.attributes == {} # The attributes are empty by default
+
+m.string.attributes == {'attr': '1'} # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted
+
+# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number
+
+m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'}) # This will lead in the following traceback
+
+# Traceback (most recent call last):
+# ...
+# pydantic.error_wrappers.ValidationError: 1 validation error for Model
+# number -> content
+# value is not a valid integer (type=type_error.integer)
```
+**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`**
+
## Contributing
Pull requests are welcome. For major changes, please open an issue first
@@ -188,11 +426,12 @@ Provides: python3-rss-parser-doc
[![License](https://img.shields.io/pypi/l/rss-parser?color=success)](https://github.com/dhvcc/rss-parser/blob/master/LICENSE)
[![GitHub Pages](https://badgen.net/github/status/dhvcc/rss-parser/gh-pages?label=docs)](https://dhvcc.github.io/rss-parser#documentation)
-[![Pypi publish](https://github.com/dhvcc/rss-parser/workflows/Pypi%20publish/badge.svg)](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22)
+![CI](https://github.com/dhvcc/rss-parser/actions/workflows/ci.yml/badge.svg?branch=master)
+![PyPi publish](https://github.com/dhvcc/rss-parser/actions/workflows/publish_to_pypi.yml/badge.svg?branch=master)
## About
-`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic`
+`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict)
## Installation
@@ -205,34 +444,153 @@ or
```bash
git clone https://github.com/dhvcc/rss-parser.git
cd rss-parser
-pip install .
+poetry build
+pip install dist/*.whl
```
## Usage
+### Quickstart
+
```python
from rss_parser import Parser
from requests import get
-rss_url = "https://feedforall.com/sample.xml"
-xml = get(rss_url)
+rss_url = "https://rss.art19.com/apology-line"
+response = get(rss_url)
-# Limit feed output to 5 items
-# To disable limit simply do not provide the argument or use None
-parser = Parser(xml=xml.content, limit=5)
-feed = parser.parse()
+rss = Parser.parse(response.text)
-# Print out feed meta data
-print(feed.language)
-print(feed.version)
+# Print out rss meta data
+print("Language", rss.channel.language)
+print("RSS", rss.version)
# Iteratively print feed items
-for item in feed.feed:
+for item in rss.channel.items:
print(item.title)
- print(item.description)
+ print(item.description[:50])
+
+# Language en
+# RSS 2.0
+# Wondery Presents - Flipping The Bird: Elon vs Twitter
+# <p>When Elon Musk posted a video of himself arrivi
+# Introducing: The Apology Line
+# <p>If you could call a number and say you’re sorry
+```
+
+Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so
+
+```xml
+<![CDATA[<p>If you could call ...</p>]]>
+```
+
+### Overriding schema
+
+If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser
+
+```python
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.rss import RSS
+from rss_parser.models.types import Tag
+
+class CustomSchema(RSS, XMLBaseModel):
+ channel: None = None # Removing previous channel field
+ custom: Tag[str]
+
+with open("tests/samples/custom.xml") as f:
+ data = f.read()
+
+rss = Parser.parse(data, schema=CustomSchema)
+print("RSS", rss.version)
+print("Custom", rss.custom)
+
+# RSS 2.0
+# Custom Custom tag data
+```
+
+### xmltodict
+
+This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict)
+
+The basic thing you should know is that your data is processed into dictionaries
+
+For example, this data
+
+```xml
+<tag>content</tag>
+```
+
+will result in the following
+
+```python
+{
+ "tag": "content"
+}
+```
+
+*But*, when handling attributes, the content of the tag will be also a dictionary
+
+```xml
+<tag attr="1" data-value="data">data</tag>
+```
+
+Turns into
+
+```python
+{
+ "tag": {
+ "@attr": "1",
+ "@data-value": "data",
+ "#text": "content"
+ }
+}
```
+### Tag field
+
+This is a generic field that handles tags as raw data or a dictonary returned with attributes
+
+*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value*
+
+Example
+
+```python
+from rss_parser.models import XMLBaseModel
+class Model(XMLBaseModel):
+ number: Tag[int]
+ string: Tag[str]
+
+m = Model(
+ number=1,
+ string={'@attr': '1', '#text': 'content'},
+)
+
+m.number.content == 1 # Content value is an integer, as per the generic type
+
+m.number.content + 10 == m.number + 10 # But you're still able to use the Tag itself in common operators
+
+m.number.bit_length() == 1 # As it's the case for methods/attributes not found in the Tag itself
+
+type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>) # types are NOT the same, however, the interfaces are very similar most of the time
+
+m.number.attributes == {} # The attributes are empty by default
+
+m.string.attributes == {'attr': '1'} # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted
+
+# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number
+
+m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'}) # This will lead in the following traceback
+
+# Traceback (most recent call last):
+# ...
+# pydantic.error_wrappers.ValidationError: 1 validation error for Model
+# number -> content
+# value is not a valid integer (type=type_error.integer)
+```
+
+**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`**
+
## Contributing
Pull requests are welcome. For major changes, please open an issue first
@@ -252,7 +610,7 @@ poetry run pre-commit install -t=pre-commit -t=pre-push
%prep
-%autosetup -n rss-parser-0.2.4
+%autosetup -n rss_parser-1.0.0
%build
%py3_build
@@ -266,20 +624,20 @@ if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
pushd %{buildroot}
if [ -d usr/lib ]; then
- find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+ find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
if [ -d usr/lib64 ]; then
- find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+ find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
if [ -d usr/bin ]; then
- find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+ find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
if [ -d usr/sbin ]; then
- find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+ find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
touch doclist.lst
if [ -d usr/share/man ]; then
- find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+ find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst
fi
popd
mv %{buildroot}/filelist.lst .
@@ -292,5 +650,5 @@ mv %{buildroot}/doclist.lst .
%{_docdir}/*
%changelog
-* Tue May 30 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.4-1
+* Thu Jun 08 2023 Python_Bot <Python_Bot@openeuler.org> - 1.0.0-1
- Package Spec generated
diff --git a/sources b/sources
index 0f2a6ed..0526af9 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-53bd5a227489dd86ae216b12590e471a rss-parser-0.2.4.tar.gz
+875c685cd764c0d15746e113a2d68df1 rss_parser-1.0.0.tar.gz