diff options
| author | CoprDistGit <infra@openeuler.org> | 2023-06-08 20:00:20 +0000 |
|---|---|---|
| committer | CoprDistGit <infra@openeuler.org> | 2023-06-08 20:00:20 +0000 |
| commit | be9ed828c889fc33005bc0b93aad907c9fc0af59 (patch) | |
| tree | 03780662a67521f7b5d7cd31fa91f63d8b0456e3 | |
| parent | cd69cd9c5347906d056a470a2f221fc809cd3081 (diff) | |
automatic import of python-rss-parseropeneuler20.03
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | python-rss-parser.spec | 468 | ||||
| -rw-r--r-- | sources | 2 |
3 files changed, 415 insertions, 56 deletions
@@ -1 +1,2 @@ /rss-parser-0.2.4.tar.gz +/rss_parser-1.0.0.tar.gz diff --git a/python-rss-parser.spec b/python-rss-parser.spec index 5cfa51e..4d974be 100644 --- a/python-rss-parser.spec +++ b/python-rss-parser.spec @@ -1,18 +1,16 @@ %global _empty_manifest_terminate_build 0 Name: python-rss-parser -Version: 0.2.4 +Version: 1.0.0 Release: 1 Summary: Typed pythonic RSS parser -License: GPLv3 +License: GPL-3.0 URL: https://dhvcc.github.io/rss-parser -Source0: https://mirrors.nju.edu.cn/pypi/web/packages/89/56/f8f1330323c0c2828071f1024375452da716b6bcf01cdded465c054a6ced/rss-parser-0.2.4.tar.gz +Source0: https://mirrors.aliyun.com/pypi/web/packages/d1/bf/662e818701a03cc46c4557576c72ac46c9681bb4e4e04c8874b583f92e86/rss_parser-1.0.0.tar.gz BuildArch: noarch -Requires: python3-bs4 Requires: python3-pydantic -Requires: python3-lxml -Requires: python3-requests Requires: python3-pytest +Requires: python3-xmltodict %description # Rss parser @@ -27,11 +25,12 @@ Requires: python3-pytest [](https://github.com/dhvcc/rss-parser/blob/master/LICENSE) [](https://dhvcc.github.io/rss-parser#documentation) -[](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22) + + ## About -`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic` +`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict) ## Installation @@ -44,34 +43,153 @@ or ```bash git clone https://github.com/dhvcc/rss-parser.git cd rss-parser -pip install . +poetry build +pip install dist/*.whl ``` ## Usage +### Quickstart + ```python from rss_parser import Parser from requests import get -rss_url = "https://feedforall.com/sample.xml" -xml = get(rss_url) +rss_url = "https://rss.art19.com/apology-line" +response = get(rss_url) -# Limit feed output to 5 items -# To disable limit simply do not provide the argument or use None -parser = Parser(xml=xml.content, limit=5) -feed = parser.parse() +rss = Parser.parse(response.text) -# Print out feed meta data -print(feed.language) -print(feed.version) +# Print out rss meta data +print("Language", rss.channel.language) +print("RSS", rss.version) # Iteratively print feed items -for item in feed.feed: +for item in rss.channel.items: print(item.title) - print(item.description) + print(item.description[:50]) + +# Language en +# RSS 2.0 +# Wondery Presents - Flipping The Bird: Elon vs Twitter +# <p>When Elon Musk posted a video of himself arrivi +# Introducing: The Apology Line +# <p>If you could call a number and say you’re sorry +``` + +Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so + +```xml +<![CDATA[<p>If you could call ...</p>]]> +``` + +### Overriding schema + +If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser + +```python +from rss_parser.models import XMLBaseModel +from rss_parser.models.rss import RSS +from rss_parser.models.types import Tag + +class CustomSchema(RSS, XMLBaseModel): + channel: None = None # Removing previous channel field + custom: Tag[str] + +with open("tests/samples/custom.xml") as f: + data = f.read() + +rss = Parser.parse(data, schema=CustomSchema) + +print("RSS", rss.version) +print("Custom", rss.custom) + +# RSS 2.0 +# Custom Custom tag data +``` + +### xmltodict + +This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict) + +The basic thing you should know is that your data is processed into dictionaries + +For example, this data + +```xml +<tag>content</tag> +``` + +will result in the following + +```python +{ + "tag": "content" +} +``` + +*But*, when handling attributes, the content of the tag will be also a dictionary + +```xml +<tag attr="1" data-value="data">data</tag> +``` + +Turns into + +```python +{ + "tag": { + "@attr": "1", + "@data-value": "data", + "#text": "content" + } +} +``` + +### Tag field + +This is a generic field that handles tags as raw data or a dictonary returned with attributes + +*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value* + +Example +```python +from rss_parser.models import XMLBaseModel +class Model(XMLBaseModel): + number: Tag[int] + string: Tag[str] + +m = Model( + number=1, + string={'@attr': '1', '#text': 'content'}, +) + +m.number.content == 1 # Content value is an integer, as per the generic type + +m.number.content + 10 == m.number + 10 # But you're still able to use the Tag itself in common operators + +m.number.bit_length() == 1 # As it's the case for methods/attributes not found in the Tag itself + +type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>) # types are NOT the same, however, the interfaces are very similar most of the time + +m.number.attributes == {} # The attributes are empty by default + +m.string.attributes == {'attr': '1'} # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted + +# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number + +m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'}) # This will lead in the following traceback + +# Traceback (most recent call last): +# ... +# pydantic.error_wrappers.ValidationError: 1 validation error for Model +# number -> content +# value is not a valid integer (type=type_error.integer) ``` +**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`** + ## Contributing Pull requests are welcome. For major changes, please open an issue first @@ -109,11 +227,12 @@ BuildRequires: python3-pip [](https://github.com/dhvcc/rss-parser/blob/master/LICENSE) [](https://dhvcc.github.io/rss-parser#documentation) -[](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22) + + ## About -`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic` +`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict) ## Installation @@ -126,34 +245,153 @@ or ```bash git clone https://github.com/dhvcc/rss-parser.git cd rss-parser -pip install . +poetry build +pip install dist/*.whl ``` ## Usage +### Quickstart + ```python from rss_parser import Parser from requests import get -rss_url = "https://feedforall.com/sample.xml" -xml = get(rss_url) +rss_url = "https://rss.art19.com/apology-line" +response = get(rss_url) -# Limit feed output to 5 items -# To disable limit simply do not provide the argument or use None -parser = Parser(xml=xml.content, limit=5) -feed = parser.parse() +rss = Parser.parse(response.text) -# Print out feed meta data -print(feed.language) -print(feed.version) +# Print out rss meta data +print("Language", rss.channel.language) +print("RSS", rss.version) # Iteratively print feed items -for item in feed.feed: +for item in rss.channel.items: print(item.title) - print(item.description) + print(item.description[:50]) + +# Language en +# RSS 2.0 +# Wondery Presents - Flipping The Bird: Elon vs Twitter +# <p>When Elon Musk posted a video of himself arrivi +# Introducing: The Apology Line +# <p>If you could call a number and say you’re sorry +``` + +Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so + +```xml +<![CDATA[<p>If you could call ...</p>]]> +``` + +### Overriding schema + +If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser + +```python +from rss_parser.models import XMLBaseModel +from rss_parser.models.rss import RSS +from rss_parser.models.types import Tag + +class CustomSchema(RSS, XMLBaseModel): + channel: None = None # Removing previous channel field + custom: Tag[str] + +with open("tests/samples/custom.xml") as f: + data = f.read() + +rss = Parser.parse(data, schema=CustomSchema) + +print("RSS", rss.version) +print("Custom", rss.custom) + +# RSS 2.0 +# Custom Custom tag data +``` + +### xmltodict + +This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict) + +The basic thing you should know is that your data is processed into dictionaries + +For example, this data + +```xml +<tag>content</tag> +``` + +will result in the following + +```python +{ + "tag": "content" +} +``` + +*But*, when handling attributes, the content of the tag will be also a dictionary + +```xml +<tag attr="1" data-value="data">data</tag> +``` + +Turns into + +```python +{ + "tag": { + "@attr": "1", + "@data-value": "data", + "#text": "content" + } +} +``` + +### Tag field + +This is a generic field that handles tags as raw data or a dictonary returned with attributes + +*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value* + +Example + +```python +from rss_parser.models import XMLBaseModel +class Model(XMLBaseModel): + number: Tag[int] + string: Tag[str] + +m = Model( + number=1, + string={'@attr': '1', '#text': 'content'}, +) + +m.number.content == 1 # Content value is an integer, as per the generic type + +m.number.content + 10 == m.number + 10 # But you're still able to use the Tag itself in common operators + +m.number.bit_length() == 1 # As it's the case for methods/attributes not found in the Tag itself +type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>) # types are NOT the same, however, the interfaces are very similar most of the time + +m.number.attributes == {} # The attributes are empty by default + +m.string.attributes == {'attr': '1'} # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted + +# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number + +m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'}) # This will lead in the following traceback + +# Traceback (most recent call last): +# ... +# pydantic.error_wrappers.ValidationError: 1 validation error for Model +# number -> content +# value is not a valid integer (type=type_error.integer) ``` +**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`** + ## Contributing Pull requests are welcome. For major changes, please open an issue first @@ -188,11 +426,12 @@ Provides: python3-rss-parser-doc [](https://github.com/dhvcc/rss-parser/blob/master/LICENSE) [](https://dhvcc.github.io/rss-parser#documentation) -[](https://github.com/dhvcc/rss-parser/actions?query=workflow%3A%22Pypi+publish%22) + + ## About -`rss-parser` is typed python RSS parsing module built using `BeautifulSoup` and `pydantic` +`rss-parser` is typed python RSS parsing module built using [pydantic](https://github.com/pydantic/pydantic) and [xmltodict](https://github.com/martinblech/xmltodict) ## Installation @@ -205,34 +444,153 @@ or ```bash git clone https://github.com/dhvcc/rss-parser.git cd rss-parser -pip install . +poetry build +pip install dist/*.whl ``` ## Usage +### Quickstart + ```python from rss_parser import Parser from requests import get -rss_url = "https://feedforall.com/sample.xml" -xml = get(rss_url) +rss_url = "https://rss.art19.com/apology-line" +response = get(rss_url) -# Limit feed output to 5 items -# To disable limit simply do not provide the argument or use None -parser = Parser(xml=xml.content, limit=5) -feed = parser.parse() +rss = Parser.parse(response.text) -# Print out feed meta data -print(feed.language) -print(feed.version) +# Print out rss meta data +print("Language", rss.channel.language) +print("RSS", rss.version) # Iteratively print feed items -for item in feed.feed: +for item in rss.channel.items: print(item.title) - print(item.description) + print(item.description[:50]) + +# Language en +# RSS 2.0 +# Wondery Presents - Flipping The Bird: Elon vs Twitter +# <p>When Elon Musk posted a video of himself arrivi +# Introducing: The Apology Line +# <p>If you could call a number and say you’re sorry +``` + +Here we can see that description is still somehow has <p> - this is beacause it's placed as [CDATA](https://www.w3resource.com/xml/CDATA-sections.php) like so + +```xml +<![CDATA[<p>If you could call ...</p>]]> +``` + +### Overriding schema + +If you want to customize the schema or provide a custom one - use `schema` keyword argument of the parser + +```python +from rss_parser.models import XMLBaseModel +from rss_parser.models.rss import RSS +from rss_parser.models.types import Tag + +class CustomSchema(RSS, XMLBaseModel): + channel: None = None # Removing previous channel field + custom: Tag[str] + +with open("tests/samples/custom.xml") as f: + data = f.read() + +rss = Parser.parse(data, schema=CustomSchema) +print("RSS", rss.version) +print("Custom", rss.custom) + +# RSS 2.0 +# Custom Custom tag data +``` + +### xmltodict + +This library uses [xmltodict](https://github.com/martinblech/xmltodict) to parse XML data. You can see the detailed documentation [here](https://github.com/martinblech/xmltodict#xmltodict) + +The basic thing you should know is that your data is processed into dictionaries + +For example, this data + +```xml +<tag>content</tag> +``` + +will result in the following + +```python +{ + "tag": "content" +} +``` + +*But*, when handling attributes, the content of the tag will be also a dictionary + +```xml +<tag attr="1" data-value="data">data</tag> +``` + +Turns into + +```python +{ + "tag": { + "@attr": "1", + "@data-value": "data", + "#text": "content" + } +} ``` +### Tag field + +This is a generic field that handles tags as raw data or a dictonary returned with attributes + +*Although this is a complex class, it forwards most of the methods to it's content attribute, so you don't notice a difference if you're only after the .content value* + +Example + +```python +from rss_parser.models import XMLBaseModel +class Model(XMLBaseModel): + number: Tag[int] + string: Tag[str] + +m = Model( + number=1, + string={'@attr': '1', '#text': 'content'}, +) + +m.number.content == 1 # Content value is an integer, as per the generic type + +m.number.content + 10 == m.number + 10 # But you're still able to use the Tag itself in common operators + +m.number.bit_length() == 1 # As it's the case for methods/attributes not found in the Tag itself + +type(m.number), type(m.number.content) == (<class 'rss_parser.models.image.Tag[int]'>, <class 'int'>) # types are NOT the same, however, the interfaces are very similar most of the time + +m.number.attributes == {} # The attributes are empty by default + +m.string.attributes == {'attr': '1'} # But are populated when provided. Note that the @ symbol is trimmed from the beggining, however, camelCase is not converted + +# Generic argument types are handled by pydantic - let's try to provide a string for a Tag[int] number + +m = Model(number='not_a_number', string={'@customAttr': 'v', '#text': 'str tag value'}) # This will lead in the following traceback + +# Traceback (most recent call last): +# ... +# pydantic.error_wrappers.ValidationError: 1 validation error for Model +# number -> content +# value is not a valid integer (type=type_error.integer) +``` + +**If you wish to avoid all of the method/attribute forwarding "magic" - you should use `rss_parser.models.types.TagRaw`** + ## Contributing Pull requests are welcome. For major changes, please open an issue first @@ -252,7 +610,7 @@ poetry run pre-commit install -t=pre-commit -t=pre-push %prep -%autosetup -n rss-parser-0.2.4 +%autosetup -n rss_parser-1.0.0 %build %py3_build @@ -266,20 +624,20 @@ if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi pushd %{buildroot} if [ -d usr/lib ]; then - find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst + find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst fi if [ -d usr/lib64 ]; then - find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst + find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst fi if [ -d usr/bin ]; then - find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst + find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst fi if [ -d usr/sbin ]; then - find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst + find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst fi touch doclist.lst if [ -d usr/share/man ]; then - find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst + find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst fi popd mv %{buildroot}/filelist.lst . @@ -292,5 +650,5 @@ mv %{buildroot}/doclist.lst . %{_docdir}/* %changelog -* Tue May 30 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.4-1 +* Thu Jun 08 2023 Python_Bot <Python_Bot@openeuler.org> - 1.0.0-1 - Package Spec generated @@ -1 +1 @@ -53bd5a227489dd86ae216b12590e471a rss-parser-0.2.4.tar.gz +875c685cd764c0d15746e113a2d68df1 rss_parser-1.0.0.tar.gz |
