automatic import of python-rss-parseropeneuler20.03

author: CoprDistGit <infra@openeuler.org> 2023-06-08 20:00:20 +0000
committer: CoprDistGit <infra@openeuler.org> 2023-06-08 20:00:20 +0000
commit: be9ed828c889fc33005bc0b93aad907c9fc0af59 (patch)
tree: 03780662a67521f7b5d7cd31fa91f63d8b0456e3
parent: cd69cd9c5347906d056a470a2f221fc809cd3081 (diff)
3 files changed, 415 insertions, 56 deletions
diff --git a/.gitignore b/.gitignore
index 90904bd..1ffdcfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 /rss-parser-0.2.4.tar.gz
+/rss_parser-1.0.0.tar.gz
diff --git a/python-rss-parser.spec b/python-rss-parser.spec
index 5cfa51e..4d974be 100644
--- a/python-rss-parser.spec
+++ b/python-rss-parser.spec
@@ -1,18 +1,16 @@
 %global _empty_manifest_terminate_build 0
 Name:		python-rss-parser
-Version:	0.2.4
+Version:	1.0.0
 Release:	1
 Summary:	Typed pythonic RSS parser
-License:	GPLv3
+License:	GPL-3.0
 URL:		https://dhvcc.github.io/rss-parser
-Source0:	https://mirrors.nju.edu.cn/pypi/web/packages/89/56/f8f1330323c0c2828071f1024375452da716b6bcf01cdded465c054a6ced/rss-parser-0.2.4.tar.gz
+Source0:	https://mirrors.aliyun.com/pypi/web/packages/d1/bf/662e818701a03cc46c4557576c72ac46c9681bb4e4e04c8874b583f92e86/rss_parser-1.0.0.tar.gz
 BuildArch:	noarch
 
-Requires:	python3-bs4
 Requires:	python3-pydantic
-Requires:	python3-lxml
-Requires:	python3-requests
 Requires:	python3-pytest
+Requires:	python3-xmltodict
 
 %description
 # Rss parser
@@ -27,11 +25,12 @@ Requires:	python3-pytest
 [![License](https://img.shields.io/pypi/l/rss-parser?color=success)](https://github.com/dhvcc/rss-parser/blob/master/LICENSE)
 [![GitHub Pages](https://badgen.net/github/status/dhvcc/rss-parser/gh-pages?label=docs)](https://dhvcc.github.io/rss-parser#documentation)
 
-[![Pypi publish](https://github.com/dhvcc/rss-parser/workflows/Pypi%20publish/badge.svg)](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22)
+![CI](https://github.com/dhvcc/rss-parser/actions/workflows/ci.yml/badge.svg?branch=master)
+![PyPi publish](https://github.com/dhvcc/rss-parser/actions/workflows/publish_to_pypi.yml/badge.svg?branch=master)
 
 ## About
 
-`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic`
+`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict)
 
 ## Installation
 
@@ -44,34 +43,153 @@ or
 ```bash
 git clone https://github.com/dhvcc/rss-parser.git
 cd rss-parser
-pip install .
+poetry build
+pip install dist/*.whl
 ```
 
 ## Usage
 
+### Quickstart
+
 ```python
 from rss_parser import Parser
 from requests import get
 
-rss_url = "https://feedforall.com/sample.xml"
-xml = get(rss_url)
+rss_url = "https://rss.art19.com/apology-line"
+response = get(rss_url)
 
-# Limit feed output to 5 items
-# To disable limit simply do not provide the argument or use None
-parser = Parser(xml=xml.content, limit=5)
-feed = parser.parse()
+rss = Parser.parse(response.text)
 
-# Print out feed meta data
-print(feed.language)
-print(feed.version)
+# Print out rss meta data
+print("Language", rss.channel.language)
+print("RSS", rss.version)
 
 # Iteratively print feed items
-for item in feed.feed:
+for item in rss.channel.items:
     print(item.title)
-    print(item.description)
+    print(item.description[:50])
+
+# Language en
+# RSS 2.0
+# Wondery Presents - Flipping The Bird: Elon vs Twitter
+# <p>When Elon Musk posted a video of himself arrivi
+# Introducing: The Apology Line
+# <p>If you could call a number and say you’re sorry
+```
+
+Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so
+
+```xml
+<![CDATA[<p>If you could call ...</p>]]>
+```
+
+### Overriding schema
+
+If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser
+
+```python
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.rss import RSS
+from rss_parser.models.types import Tag
+
+class CustomSchema(RSS, XMLBaseModel):
+    channel: None = None # Removing previous channel field
+    custom: Tag[str]
+
+with open("tests/samples/custom.xml") as f:
+    data = f.read()
+
+rss = Parser.parse(data, schema=CustomSchema)
+
+print("RSS", rss.version)
+print("Custom", rss.custom)
+
+# RSS 2.0
+# Custom Custom tag data
+```
+
+### xmltodict
+
+This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict)
+
+The basic thing you should know is that your data is processed into dictionaries
+
+For example, this data
+
+```xml
+<tag>content</tag>
+```
+
+will result in the following
+
+```python
+{
+    "tag": "content"
+}
+```
+
+*But*, when handling attributes, the content of the tag will be also a dictionary
+
+```xml
+<tag attr="1" data-value="data">data</tag>
+```
+
+Turns into
+
+```python
+{
+    "tag": {
+        "@attr": "1",
+        "@data-value": "data",
+        "#text": "content"
+    }
+}
+```
+
+### Tag field
+
+This is a generic field that handles tags as raw data or a dictonary returned with attributes
+
+*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value*
+
+Example
 
+```python
+from rss_parser.models import XMLBaseModel
+class Model(XMLBaseModel):
+     number: Tag[int]
+     string: Tag[str]
+
+m = Model(
+    number=1,
+    string={'@attr': '1', '#text': 'content'},
+)
+
+m.number.content == 1  # Content value is an integer, as per the generic type
+
+m.number.content + 10 == m.number + 10  # But you're still able to use the Tag itself in common operators
+
+m.number.bit_length() == 1  # As it's the case for methods/attributes not found in the Tag itself
+
+type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>)  # types are NOT the same, however, the interfaces are very similar most of the time
+
+m.number.attributes == {}  # The attributes are empty by default
+
+m.string.attributes == {'attr': '1'}  # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted
+
+# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number
+
+m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'})  # This will lead in the following traceback
+
+# Traceback (most recent call last):
+#     ...
+# pydantic.error_wrappers.ValidationError: 1 validation error for Model
+# number -> content
+#     value is not a valid integer (type=type_error.integer)
 ```
 
+**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`**
+
 ## Contributing
 
 Pull requests are welcome. For major changes, please open an issue first
@@ -109,11 +227,12 @@ BuildRequires:	python3-pip
 [![License](https://img.shields.io/pypi/l/rss-parser?color=success)](https://github.com/dhvcc/rss-parser/blob/master/LICENSE)
 [![GitHub Pages](https://badgen.net/github/status/dhvcc/rss-parser/gh-pages?label=docs)](https://dhvcc.github.io/rss-parser#documentation)
 
-[![Pypi publish](https://github.com/dhvcc/rss-parser/workflows/Pypi%20publish/badge.svg)](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22)
+![CI](https://github.com/dhvcc/rss-parser/actions/workflows/ci.yml/badge.svg?branch=master)
+![PyPi publish](https://github.com/dhvcc/rss-parser/actions/workflows/publish_to_pypi.yml/badge.svg?branch=master)
 
 ## About
 
-`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic`
+`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict)
 
 ## Installation
 
@@ -126,34 +245,153 @@ or
 ```bash
 git clone https://github.com/dhvcc/rss-parser.git
 cd rss-parser
-pip install .
+poetry build
+pip install dist/*.whl
 ```
 
 ## Usage
 
+### Quickstart
+
 ```python
 from rss_parser import Parser
 from requests import get
 
-rss_url = "https://feedforall.com/sample.xml"
-xml = get(rss_url)
+rss_url = "https://rss.art19.com/apology-line"
+response = get(rss_url)
 
-# Limit feed output to 5 items
-# To disable limit simply do not provide the argument or use None
-parser = Parser(xml=xml.content, limit=5)
-feed = parser.parse()
+rss = Parser.parse(response.text)
 
-# Print out feed meta data
-print(feed.language)
-print(feed.version)
+# Print out rss meta data
+print("Language", rss.channel.language)
+print("RSS", rss.version)
 
 # Iteratively print feed items
-for item in feed.feed:
+for item in rss.channel.items:
     print(item.title)
-    print(item.description)
+    print(item.description[:50])
+
+# Language en
+# RSS 2.0
+# Wondery Presents - Flipping The Bird: Elon vs Twitter
+# <p>When Elon Musk posted a video of himself arrivi
+# Introducing: The Apology Line
+# <p>If you could call a number and say you’re sorry
+```
+
+Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so
+
+```xml
+<![CDATA[<p>If you could call ...</p>]]>
+```
+
+### Overriding schema
+
+If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser
+
+```python
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.rss import RSS
+from rss_parser.models.types import Tag
+
+class CustomSchema(RSS, XMLBaseModel):
+    channel: None = None # Removing previous channel field
+    custom: Tag[str]
+
+with open("tests/samples/custom.xml") as f:
+    data = f.read()
+
+rss = Parser.parse(data, schema=CustomSchema)
+
+print("RSS", rss.version)
+print("Custom", rss.custom)
+
+# RSS 2.0
+# Custom Custom tag data
+```
+
+### xmltodict
+
+This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict)
+
+The basic thing you should know is that your data is processed into dictionaries
+
+For example, this data
+
+```xml
+<tag>content</tag>
+```
+
+will result in the following
+
+```python
+{
+    "tag": "content"
+}
+```
+
+*But*, when handling attributes, the content of the tag will be also a dictionary
+
+```xml
+<tag attr="1" data-value="data">data</tag>
+```
+
+Turns into
+
+```python
+{
+    "tag": {
+        "@attr": "1",
+        "@data-value": "data",
+        "#text": "content"
+    }
+}
+```
+
+### Tag field
+
+This is a generic field that handles tags as raw data or a dictonary returned with attributes
+
+*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value*
+
+Example
+
+```python
+from rss_parser.models import XMLBaseModel
+class Model(XMLBaseModel):
+     number: Tag[int]
+     string: Tag[str]
+
+m = Model(
+    number=1,
+    string={'@attr': '1', '#text': 'content'},
+)
+
+m.number.content == 1  # Content value is an integer, as per the generic type
+
+m.number.content + 10 == m.number + 10  # But you're still able to use the Tag itself in common operators
+
+m.number.bit_length() == 1  # As it's the case for methods/attributes not found in the Tag itself
 
+type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>)  # types are NOT the same, however, the interfaces are very similar most of the time
+
+m.number.attributes == {}  # The attributes are empty by default
+
+m.string.attributes == {'attr': '1'}  # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted
+
+# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number
+
+m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'})  # This will lead in the following traceback
+
+# Traceback (most recent call last):
+#     ...
+# pydantic.error_wrappers.ValidationError: 1 validation error for Model
+# number -> content
+#     value is not a valid integer (type=type_error.integer)
 ```
 
+**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`**
+
 ## Contributing
 
 Pull requests are welcome. For major changes, please open an issue first
@@ -188,11 +426,12 @@ Provides:	python3-rss-parser-doc
 [![License](https://img.shields.io/pypi/l/rss-parser?color=success)](https://github.com/dhvcc/rss-parser/blob/master/LICENSE)
 [![GitHub Pages](https://badgen.net/github/status/dhvcc/rss-parser/gh-pages?label=docs)](https://dhvcc.github.io/rss-parser#documentation)
 
-[![Pypi publish](https://github.com/dhvcc/rss-parser/workflows/Pypi%20publish/badge.svg)](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22)
+![CI](https://github.com/dhvcc/rss-parser/actions/workflows/ci.yml/badge.svg?branch=master)
+![PyPi publish](https://github.com/dhvcc/rss-parser/actions/workflows/publish_to_pypi.yml/badge.svg?branch=master)
 
 ## About
 
-`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic`
+`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict)
 
 ## Installation
 
@@ -205,34 +444,153 @@ or
 ```bash
 git clone https://github.com/dhvcc/rss-parser.git
 cd rss-parser
-pip install .
+poetry build
+pip install dist/*.whl
 ```
 
 ## Usage
 
+### Quickstart
+
 ```python
 from rss_parser import Parser
 from requests import get
 
-rss_url = "https://feedforall.com/sample.xml"
-xml = get(rss_url)
+rss_url = "https://rss.art19.com/apology-line"
+response = get(rss_url)
 
-# Limit feed output to 5 items
-# To disable limit simply do not provide the argument or use None
-parser = Parser(xml=xml.content, limit=5)
-feed = parser.parse()
+rss = Parser.parse(response.text)
 
-# Print out feed meta data
-print(feed.language)
-print(feed.version)
+# Print out rss meta data
+print("Language", rss.channel.language)
+print("RSS", rss.version)
 
 # Iteratively print feed items
-for item in feed.feed:
+for item in rss.channel.items:
     print(item.title)
-    print(item.description)
+    print(item.description[:50])
+
+# Language en
+# RSS 2.0
+# Wondery Presents - Flipping The Bird: Elon vs Twitter
+# <p>When Elon Musk posted a video of himself arrivi
+# Introducing: The Apology Line
+# <p>If you could call a number and say you’re sorry
+```
+
+Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so
+
+```xml
+<![CDATA[<p>If you could call ...</p>]]>
+```
+
+### Overriding schema
+
+If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser
+
+```python
+from rss_parser.models import XMLBaseModel
+from rss_parser.models.rss import RSS
+from rss_parser.models.types import Tag
+
+class CustomSchema(RSS, XMLBaseModel):
+    channel: None = None # Removing previous channel field
+    custom: Tag[str]
+
+with open("tests/samples/custom.xml") as f:
+    data = f.read()
+
+rss = Parser.parse(data, schema=CustomSchema)
 
+print("RSS", rss.version)
+print("Custom", rss.custom)
+
+# RSS 2.0
+# Custom Custom tag data
+```
+
+### xmltodict
+
+This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict)
+
+The basic thing you should know is that your data is processed into dictionaries
+
+For example, this data
+
+```xml
+<tag>content</tag>
+```
+
+will result in the following
+
+```python
+{
+    "tag": "content"
+}
+```
+
+*But*, when handling attributes, the content of the tag will be also a dictionary
+
+```xml
+<tag attr="1" data-value="data">data</tag>
+```
+
+Turns into
+
+```python
+{
+    "tag": {
+        "@attr": "1",
+        "@data-value": "data",
+        "#text": "content"
+    }
+}
 ```
 
+### Tag field
+
+This is a generic field that handles tags as raw data or a dictonary returned with attributes
+
+*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value*
+
+Example
+
+```python
+from rss_parser.models import XMLBaseModel
+class Model(XMLBaseModel):
+     number: Tag[int]
+     string: Tag[str]
+
+m = Model(
+    number=1,
+    string={'@attr': '1', '#text': 'content'},
+)
+
+m.number.content == 1  # Content value is an integer, as per the generic type
+
+m.number.content + 10 == m.number + 10  # But you're still able to use the Tag itself in common operators
+
+m.number.bit_length() == 1  # As it's the case for methods/attributes not found in the Tag itself
+
+type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>)  # types are NOT the same, however, the interfaces are very similar most of the time
+
+m.number.attributes == {}  # The attributes are empty by default
+
+m.string.attributes == {'attr': '1'}  # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted
+
+# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number
+
+m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'})  # This will lead in the following traceback
+
+# Traceback (most recent call last):
+#     ...
+# pydantic.error_wrappers.ValidationError: 1 validation error for Model
+# number -> content
+#     value is not a valid integer (type=type_error.integer)
+```
+
+**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`**
+
 ## Contributing
 
 Pull requests are welcome. For major changes, please open an issue first
@@ -252,7 +610,7 @@ poetry run pre-commit install -t=pre-commit -t=pre-push
 
 
 %prep
-%autosetup -n rss-parser-0.2.4
+%autosetup -n rss_parser-1.0.0
 
 %build
 %py3_build
@@ -266,20 +624,20 @@ if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
 if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
 pushd %{buildroot}
 if [ -d usr/lib ]; then
-	find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+	find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst
 fi
 if [ -d usr/lib64 ]; then
-	find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+	find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst
 fi
 if [ -d usr/bin ]; then
-	find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+	find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
 fi
 if [ -d usr/sbin ]; then
-	find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+	find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
 fi
 touch doclist.lst
 if [ -d usr/share/man ]; then
-	find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+	find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst
 fi
 popd
 mv %{buildroot}/filelist.lst .
@@ -292,5 +650,5 @@ mv %{buildroot}/doclist.lst .
 %{_docdir}/*
 
 %changelog
-* Tue May 30 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.4-1
+* Thu Jun 08 2023 Python_Bot <Python_Bot@openeuler.org> - 1.0.0-1
 - Package Spec generated
diff --git a/sources b/sources
index 0f2a6ed..0526af9 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-53bd5a227489dd86ae216b12590e471a  rss-parser-0.2.4.tar.gz
+875c685cd764c0d15746e113a2d68df1  rss_parser-1.0.0.tar.gz
author	CoprDistGit <infra@openeuler.org>	2023-06-08 20:00:20 +0000
committer	CoprDistGit <infra@openeuler.org>	2023-06-08 20:00:20 +0000
commit	be9ed828c889fc33005bc0b93aad907c9fc0af59 (patch)
tree	03780662a67521f7b5d7cd31fa91f63d8b0456e3
parent	cd69cd9c5347906d056a470a2f221fc809cd3081 (diff)