diff options
author | CoprDistGit <infra@openeuler.org> | 2023-04-10 18:47:56 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-04-10 18:47:56 +0000 |
commit | 380b1f2ac760063c1aeb51a9bb6bb8ebe3eb7942 (patch) | |
tree | a89db6ee17ab0fe2d3a5cdcd85737ac36abd1d1e | |
parent | 0597bfe8b779b8950b7d7031468d1c1d7ff41ca1 (diff) |
automatic import of python-dataclasses-avroschema
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-dataclasses-avroschema.spec | 1155 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 1157 insertions, 0 deletions
@@ -0,0 +1 @@ +/dataclasses_avroschema-0.41.1.tar.gz diff --git a/python-dataclasses-avroschema.spec b/python-dataclasses-avroschema.spec new file mode 100644 index 0000000..58f0789 --- /dev/null +++ b/python-dataclasses-avroschema.spec @@ -0,0 +1,1155 @@ +%global _empty_manifest_terminate_build 0 +Name: python-dataclasses-avroschema +Version: 0.41.1 +Release: 1 +Summary: Generate Avro Schemas from Python classes. Serialize/Deserialize python instances with avro schemas +License: MIT +URL: https://pypi.org/project/dataclasses-avroschema/ +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/5a/c5/f77616222dd599b52c547aecfee52afabc9bd6b09c3b4291637b0fec8653/dataclasses_avroschema-0.41.1.tar.gz +BuildArch: noarch + +Requires: python3-fastavro +Requires: python3-inflect +Requires: python3-pytz +Requires: python3-dacite +Requires: python3-faker +Requires: python3-stringcase +Requires: python3-pydantic +Requires: python3-dc-avro +Requires: python3-faust-streaming + +%description +# Dataclasses Avro Schema Generator + +Generate [Avro](https://avro.apache.org/docs/1.8.2/spec.html) Schemas from a Python class + +[](https://github.com/marcosschroh/dataclasses-avroschema/actions/workflows/tests.yaml) +[](https://actions-badge.atrox.dev/marcosschroh/dataclasses-avroschema/goto?ref=master) +[](https://github.com/marcosschroh/dataclasses-avroschema/blob/master/LICENSE) +[](https://codecov.io/gh/marcosschroh/dataclasses-avroschema) + + +## Requirements + +`python 3.7+` + +## Installation + +```bash +pip install dataclasses-avroschema +``` + +or with `pydantic` funcionalities + +```bash +pip install 'dataclasses-avroschema[pydantic]' +``` + +or with command line [dc-avro](https://marcosschroh.github.io/dc-avro/) + +```bash +pip install 'dataclasses-avroschema[cli]' +``` + +## Documentation + +https://marcosschroh.github.io/dataclasses-avroschema/ + +## Usage + +### Generating the avro schema + +```python +from dataclasses import dataclass +import enum + +import typing + +from dataclasses_avroschema import AvroModel, types + + +class FavoriteColor(enum.Enum): + BLUE = "BLUE" + YELLOW = "YELLOW" + GREEN = "GREEN" + + +@dataclass +class User(AvroModel): + "An User" + name: str + age: int + pets: typing.List[str] + accounts: typing.Dict[str, int] + favorite_colors: FavoriteColor + country: str = "Argentina" + address: str = None + + class Meta: + namespace = "User.v1" + aliases = ["user-v1", "super user"] + +User.avro_schema() + +'{ + "type": "record", + "name": "User", + "doc": "An User", + "namespace": "User.v1", + "aliases": ["user-v1", "super user"], + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": "array", "items": "string"}, + {"name": "accounts", "type": "map", "values": "long"}, + {"name": "favorite_color", "type": {"type": "enum", "name": "FavoriteColor", "symbols": ["Blue", "Yellow", "Green"]}} + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": null} + ] +}' + +User.avro_schema_to_python() + +{ + "type": "record", + "name": "User", + "doc": "An User", + "namespace": "User.v1", + "aliases": ["user-v1", "super user"], + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": {"type": "array", "items": "string", "name": "pet"}}, + {"name": "accounts", "type": {"type": "map", "values": "long", "name": "account"}}, + {"name": "favorite_colors", "type": {"type": "enum", "name": "FavoriteColor", "symbols": ["BLUE", "YELLOW", "GREEN"]}}, + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": None} + ], +} +``` + +### Serialization to avro or avro-json and json payload + +For serialization is neccesary to use python class/dataclasses instance + +```python +from dataclasses import dataclass + +import typing + +from dataclasses_avroschema import AvroModel + + +@dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + + +@dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + +address_data = { + "street": "test", + "street_number": 10, +} + +# create an Address instance +address = Address(**address_data) + +data_user = { + "name": "john", + "age": 20, + "addresses": [address], +} + +# create an User instance +user = User(**data_user) + +user.serialize() +# >>> b"\x08john(\x02\x08test\x14\x00" + +user.serialize(serialization_type="avro-json") +# >>> b'{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# Get the json from the instance +user.to_json() +# >>> '{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# Get a python dict +user.to_dict() +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} + +``` + +### Deserialization + +Deserialization could take place with an instance dataclass or the dataclass itself. Can return the dict representation or a new class instance + +```python +import typing +import dataclasses + +from dataclasses_avroschema import AvroModel + + +@dataclasses.dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + +@dataclasses.dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + +avro_binary = b"\x08john(\x02\x08test\x14\x00" +avro_json_binary = b'{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# return a new class instance!! +User.deserialize(avro_binary) +# >>>> User(name='john', age=20, addresses=[Address(street='test', street_number=10)]) + +# return a python dict +User.deserialize(avro_binary, create_instance=False) +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} + +# return a new class instance!! +User.deserialize(avro_json_binary, serialization_type="avro-json") +# >>>> User(name='john', age=20, addresses=[Address(street='test', street_number=10)]) + +# return a python dict +User.deserialize(avro_json_binary, serialization_type="avro-json", create_instance=False) +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} +``` + +## Pydantic integration + +To add `dataclasses-avroschema` functionality to `pydantic` you only need to replace `BaseModel` by `AvroBaseModel`: + +```python +import typing +import enum +import dataclasses + +from dataclasses_avroschema.avrodantic import AvroBaseModel + +from pydantic import Field + + +class FavoriteColor(str, enum.Enum): + BLUE = "BLUE" + YELLOW = "YELLOW" + GREEN = "GREEN" + + +@dataclasses.dataclass +class UserAdvance(AvroBaseModel): + name: str + age: int + pets: typing.List[str] = Field(default_factory=lambda: ["dog", "cat"]) + accounts: typing.Dict[str, int] = Field(default_factory=lambda: {"key": 1}) + has_car: bool = False + favorite_colors: FavoriteColor = FavoriteColor.BLUE + country: str = "Argentina" + address: str = None + + class Meta: + schema_doc = False + + +# Avro schema +UserAdvance.avro_schema() +'{ + "type": "record", + "name": "UserAdvance", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": {"type": "array", "items": "string", "name": "pet"}, "default": ["dog", "cat"]}, + {"name": "accounts", "type": {"type": "map", "values": "long", "name": "account"}, "default": {"key": 1}}, + {"name": "has_car", "type": "boolean", "default": false}, + {"name": "favorite_colors", "type": {"type": "enum", "name": "favorite_color", "symbols": ["BLUE", "YELLOW", "GREEN"]}, "default": "BLUE"}, + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": null} + ] +}' + +user = UserAdvance(name="bond", age=50) + +# pydantic +user.dict() +# >>> {'name': 'bond', 'age': 50, 'pets': ['dog', 'cat'], 'accounts': {'key': 1}, 'has_car': False, 'favorite_colors': <FavoriteColor.BLUE: 'BLUE'>, 'country': 'Argentina', 'address': None} + +# pydantic +user.json() +# >>> '{"name": "bond", "age": 50, "pets": ["dog", "cat"], "accounts": {"key": 1}, "has_car": false, "favorite_colors": "BLUE", "country": "Argentina", "address": null}' + +# pydantic +user = UserAdvance(name="bond") + +# ValidationError: 1 validation error for UserAdvance +# age +# field required (type=value_error.missing) + + +# dataclasses-avroschema +event = user.serialize() +print(event) +# >>> b'\x08bondd\x04\x06dog\x06cat\x00\x02\x06key\x02\x00\x00\x00\x12Argentina\x00' + +UserAdvance.deserialize(data=event) +# >>> UserAdvance(name='bond', age=50, pets=['dog', 'cat'], accounts={'key': 1}, has_car=False, favorite_colors=<FavoriteColor.BLUE: 'BLUE'>, country='Argentina', address=None) +``` + +## Examples with python streaming drivers (kafka and redis) + +Under [examples](https://github.com/marcosschroh/dataclasses-avroschema/tree/master/examples) folder you can find 3 differents kafka examples, one with [aiokafka](https://github.com/aio-libs/aiokafka) (`async`) showing the simplest use case when a `AvroModel` instance is serialized and sent it thorught kafka, and the event is consumed. +The other two examples are `sync` using the [kafka-python](https://github.com/dpkp/kafka-python) driver, where the `avro-json` serialization and `schema evolution` (`FULL` compatibility) is shown. +Also, there are two `redis` examples using `redis streams` with [walrus](https://github.com/coleifer/walrus) and [redisgears-py](https://github.com/RedisGears/redisgears-py) + +## Factory and fixtures + +[Dataclasses Avro Schema](https://github.com/marcosschroh/dataclasses-avroschema) also includes a `factory` feature, so you can generate `fast` python instances and use them, for example, to test your data streaming pipelines. Instances can be genrated using the `fake` method. + +```python +import typing +import dataclasses + +from dataclasses_avroschema import AvroModel + + +@dataclasses.dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + + +@dataclasses.dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + + +Address.fake() +# >>>> Address(street='PxZJILDRgbXyhWrrPWxQ', street_number=2067) + +User.fake() +# >>>> User(name='VGSBbOGfSGjkMDnefHIZ', age=8974, addresses=[Address(street='vNpPYgesiHUwwzGcmMiS', street_number=4790)]) +``` + +## Features + +* [x] Primitive types: int, long, double, float, boolean, string and null support +* [x] Complex types: enum, array, map, fixed, unions and records support +* [x] `typing.Annotated` supported +* [x] Logical Types: date, time (millis and micro), datetime (millis and micro), uuid support +* [X] Schema relations (oneToOne, oneToMany) +* [X] Recursive Schemas +* [X] Generate Avro Schemas from `faust.Record` +* [X] Instance serialization correspondent to `avro schema` generated +* [X] Data deserialization. Return python dict or class instance +* [X] Generate json from python class instance +* [X] Case Schemas +* [X] Generate models from `avsc` files +* [X] Examples of integration with `kafka` drivers: [aiokafka](https://github.com/aio-libs/aiokafka), [kafka-python](https://github.com/dpkp/kafka-python) +* [X] Example of integration with `redis` drivers: [walrus](https://github.com/coleifer/walrus) and [redisgears-py](https://github.com/RedisGears/redisgears-py) +* [X] Factory instances +* [X] [Pydantic](https://pydantic-docs.helpmanual.io/) integration + +## Development + +[Poetry](https://python-poetry.org/docs/) is needed to install the dependencies and develope locally + +1. Install dependencies: `poetry install` +2. Code linting: `./scripts/format` +3. Run tests: `./scripts/test` + +For commit messages we use [commitizen](https://commitizen-tools.github.io/commitizen/) in order to standardize a way of committing rules + + +%package -n python3-dataclasses-avroschema +Summary: Generate Avro Schemas from Python classes. Serialize/Deserialize python instances with avro schemas +Provides: python-dataclasses-avroschema +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-dataclasses-avroschema +# Dataclasses Avro Schema Generator + +Generate [Avro](https://avro.apache.org/docs/1.8.2/spec.html) Schemas from a Python class + +[](https://github.com/marcosschroh/dataclasses-avroschema/actions/workflows/tests.yaml) +[](https://actions-badge.atrox.dev/marcosschroh/dataclasses-avroschema/goto?ref=master) +[](https://github.com/marcosschroh/dataclasses-avroschema/blob/master/LICENSE) +[](https://codecov.io/gh/marcosschroh/dataclasses-avroschema) + + +## Requirements + +`python 3.7+` + +## Installation + +```bash +pip install dataclasses-avroschema +``` + +or with `pydantic` funcionalities + +```bash +pip install 'dataclasses-avroschema[pydantic]' +``` + +or with command line [dc-avro](https://marcosschroh.github.io/dc-avro/) + +```bash +pip install 'dataclasses-avroschema[cli]' +``` + +## Documentation + +https://marcosschroh.github.io/dataclasses-avroschema/ + +## Usage + +### Generating the avro schema + +```python +from dataclasses import dataclass +import enum + +import typing + +from dataclasses_avroschema import AvroModel, types + + +class FavoriteColor(enum.Enum): + BLUE = "BLUE" + YELLOW = "YELLOW" + GREEN = "GREEN" + + +@dataclass +class User(AvroModel): + "An User" + name: str + age: int + pets: typing.List[str] + accounts: typing.Dict[str, int] + favorite_colors: FavoriteColor + country: str = "Argentina" + address: str = None + + class Meta: + namespace = "User.v1" + aliases = ["user-v1", "super user"] + +User.avro_schema() + +'{ + "type": "record", + "name": "User", + "doc": "An User", + "namespace": "User.v1", + "aliases": ["user-v1", "super user"], + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": "array", "items": "string"}, + {"name": "accounts", "type": "map", "values": "long"}, + {"name": "favorite_color", "type": {"type": "enum", "name": "FavoriteColor", "symbols": ["Blue", "Yellow", "Green"]}} + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": null} + ] +}' + +User.avro_schema_to_python() + +{ + "type": "record", + "name": "User", + "doc": "An User", + "namespace": "User.v1", + "aliases": ["user-v1", "super user"], + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": {"type": "array", "items": "string", "name": "pet"}}, + {"name": "accounts", "type": {"type": "map", "values": "long", "name": "account"}}, + {"name": "favorite_colors", "type": {"type": "enum", "name": "FavoriteColor", "symbols": ["BLUE", "YELLOW", "GREEN"]}}, + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": None} + ], +} +``` + +### Serialization to avro or avro-json and json payload + +For serialization is neccesary to use python class/dataclasses instance + +```python +from dataclasses import dataclass + +import typing + +from dataclasses_avroschema import AvroModel + + +@dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + + +@dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + +address_data = { + "street": "test", + "street_number": 10, +} + +# create an Address instance +address = Address(**address_data) + +data_user = { + "name": "john", + "age": 20, + "addresses": [address], +} + +# create an User instance +user = User(**data_user) + +user.serialize() +# >>> b"\x08john(\x02\x08test\x14\x00" + +user.serialize(serialization_type="avro-json") +# >>> b'{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# Get the json from the instance +user.to_json() +# >>> '{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# Get a python dict +user.to_dict() +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} + +``` + +### Deserialization + +Deserialization could take place with an instance dataclass or the dataclass itself. Can return the dict representation or a new class instance + +```python +import typing +import dataclasses + +from dataclasses_avroschema import AvroModel + + +@dataclasses.dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + +@dataclasses.dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + +avro_binary = b"\x08john(\x02\x08test\x14\x00" +avro_json_binary = b'{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# return a new class instance!! +User.deserialize(avro_binary) +# >>>> User(name='john', age=20, addresses=[Address(street='test', street_number=10)]) + +# return a python dict +User.deserialize(avro_binary, create_instance=False) +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} + +# return a new class instance!! +User.deserialize(avro_json_binary, serialization_type="avro-json") +# >>>> User(name='john', age=20, addresses=[Address(street='test', street_number=10)]) + +# return a python dict +User.deserialize(avro_json_binary, serialization_type="avro-json", create_instance=False) +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} +``` + +## Pydantic integration + +To add `dataclasses-avroschema` functionality to `pydantic` you only need to replace `BaseModel` by `AvroBaseModel`: + +```python +import typing +import enum +import dataclasses + +from dataclasses_avroschema.avrodantic import AvroBaseModel + +from pydantic import Field + + +class FavoriteColor(str, enum.Enum): + BLUE = "BLUE" + YELLOW = "YELLOW" + GREEN = "GREEN" + + +@dataclasses.dataclass +class UserAdvance(AvroBaseModel): + name: str + age: int + pets: typing.List[str] = Field(default_factory=lambda: ["dog", "cat"]) + accounts: typing.Dict[str, int] = Field(default_factory=lambda: {"key": 1}) + has_car: bool = False + favorite_colors: FavoriteColor = FavoriteColor.BLUE + country: str = "Argentina" + address: str = None + + class Meta: + schema_doc = False + + +# Avro schema +UserAdvance.avro_schema() +'{ + "type": "record", + "name": "UserAdvance", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": {"type": "array", "items": "string", "name": "pet"}, "default": ["dog", "cat"]}, + {"name": "accounts", "type": {"type": "map", "values": "long", "name": "account"}, "default": {"key": 1}}, + {"name": "has_car", "type": "boolean", "default": false}, + {"name": "favorite_colors", "type": {"type": "enum", "name": "favorite_color", "symbols": ["BLUE", "YELLOW", "GREEN"]}, "default": "BLUE"}, + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": null} + ] +}' + +user = UserAdvance(name="bond", age=50) + +# pydantic +user.dict() +# >>> {'name': 'bond', 'age': 50, 'pets': ['dog', 'cat'], 'accounts': {'key': 1}, 'has_car': False, 'favorite_colors': <FavoriteColor.BLUE: 'BLUE'>, 'country': 'Argentina', 'address': None} + +# pydantic +user.json() +# >>> '{"name": "bond", "age": 50, "pets": ["dog", "cat"], "accounts": {"key": 1}, "has_car": false, "favorite_colors": "BLUE", "country": "Argentina", "address": null}' + +# pydantic +user = UserAdvance(name="bond") + +# ValidationError: 1 validation error for UserAdvance +# age +# field required (type=value_error.missing) + + +# dataclasses-avroschema +event = user.serialize() +print(event) +# >>> b'\x08bondd\x04\x06dog\x06cat\x00\x02\x06key\x02\x00\x00\x00\x12Argentina\x00' + +UserAdvance.deserialize(data=event) +# >>> UserAdvance(name='bond', age=50, pets=['dog', 'cat'], accounts={'key': 1}, has_car=False, favorite_colors=<FavoriteColor.BLUE: 'BLUE'>, country='Argentina', address=None) +``` + +## Examples with python streaming drivers (kafka and redis) + +Under [examples](https://github.com/marcosschroh/dataclasses-avroschema/tree/master/examples) folder you can find 3 differents kafka examples, one with [aiokafka](https://github.com/aio-libs/aiokafka) (`async`) showing the simplest use case when a `AvroModel` instance is serialized and sent it thorught kafka, and the event is consumed. +The other two examples are `sync` using the [kafka-python](https://github.com/dpkp/kafka-python) driver, where the `avro-json` serialization and `schema evolution` (`FULL` compatibility) is shown. +Also, there are two `redis` examples using `redis streams` with [walrus](https://github.com/coleifer/walrus) and [redisgears-py](https://github.com/RedisGears/redisgears-py) + +## Factory and fixtures + +[Dataclasses Avro Schema](https://github.com/marcosschroh/dataclasses-avroschema) also includes a `factory` feature, so you can generate `fast` python instances and use them, for example, to test your data streaming pipelines. Instances can be genrated using the `fake` method. + +```python +import typing +import dataclasses + +from dataclasses_avroschema import AvroModel + + +@dataclasses.dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + + +@dataclasses.dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + + +Address.fake() +# >>>> Address(street='PxZJILDRgbXyhWrrPWxQ', street_number=2067) + +User.fake() +# >>>> User(name='VGSBbOGfSGjkMDnefHIZ', age=8974, addresses=[Address(street='vNpPYgesiHUwwzGcmMiS', street_number=4790)]) +``` + +## Features + +* [x] Primitive types: int, long, double, float, boolean, string and null support +* [x] Complex types: enum, array, map, fixed, unions and records support +* [x] `typing.Annotated` supported +* [x] Logical Types: date, time (millis and micro), datetime (millis and micro), uuid support +* [X] Schema relations (oneToOne, oneToMany) +* [X] Recursive Schemas +* [X] Generate Avro Schemas from `faust.Record` +* [X] Instance serialization correspondent to `avro schema` generated +* [X] Data deserialization. Return python dict or class instance +* [X] Generate json from python class instance +* [X] Case Schemas +* [X] Generate models from `avsc` files +* [X] Examples of integration with `kafka` drivers: [aiokafka](https://github.com/aio-libs/aiokafka), [kafka-python](https://github.com/dpkp/kafka-python) +* [X] Example of integration with `redis` drivers: [walrus](https://github.com/coleifer/walrus) and [redisgears-py](https://github.com/RedisGears/redisgears-py) +* [X] Factory instances +* [X] [Pydantic](https://pydantic-docs.helpmanual.io/) integration + +## Development + +[Poetry](https://python-poetry.org/docs/) is needed to install the dependencies and develope locally + +1. Install dependencies: `poetry install` +2. Code linting: `./scripts/format` +3. Run tests: `./scripts/test` + +For commit messages we use [commitizen](https://commitizen-tools.github.io/commitizen/) in order to standardize a way of committing rules + + +%package help +Summary: Development documents and examples for dataclasses-avroschema +Provides: python3-dataclasses-avroschema-doc +%description help +# Dataclasses Avro Schema Generator + +Generate [Avro](https://avro.apache.org/docs/1.8.2/spec.html) Schemas from a Python class + +[](https://github.com/marcosschroh/dataclasses-avroschema/actions/workflows/tests.yaml) +[](https://actions-badge.atrox.dev/marcosschroh/dataclasses-avroschema/goto?ref=master) +[](https://github.com/marcosschroh/dataclasses-avroschema/blob/master/LICENSE) +[](https://codecov.io/gh/marcosschroh/dataclasses-avroschema) + + +## Requirements + +`python 3.7+` + +## Installation + +```bash +pip install dataclasses-avroschema +``` + +or with `pydantic` funcionalities + +```bash +pip install 'dataclasses-avroschema[pydantic]' +``` + +or with command line [dc-avro](https://marcosschroh.github.io/dc-avro/) + +```bash +pip install 'dataclasses-avroschema[cli]' +``` + +## Documentation + +https://marcosschroh.github.io/dataclasses-avroschema/ + +## Usage + +### Generating the avro schema + +```python +from dataclasses import dataclass +import enum + +import typing + +from dataclasses_avroschema import AvroModel, types + + +class FavoriteColor(enum.Enum): + BLUE = "BLUE" + YELLOW = "YELLOW" + GREEN = "GREEN" + + +@dataclass +class User(AvroModel): + "An User" + name: str + age: int + pets: typing.List[str] + accounts: typing.Dict[str, int] + favorite_colors: FavoriteColor + country: str = "Argentina" + address: str = None + + class Meta: + namespace = "User.v1" + aliases = ["user-v1", "super user"] + +User.avro_schema() + +'{ + "type": "record", + "name": "User", + "doc": "An User", + "namespace": "User.v1", + "aliases": ["user-v1", "super user"], + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": "array", "items": "string"}, + {"name": "accounts", "type": "map", "values": "long"}, + {"name": "favorite_color", "type": {"type": "enum", "name": "FavoriteColor", "symbols": ["Blue", "Yellow", "Green"]}} + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": null} + ] +}' + +User.avro_schema_to_python() + +{ + "type": "record", + "name": "User", + "doc": "An User", + "namespace": "User.v1", + "aliases": ["user-v1", "super user"], + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": {"type": "array", "items": "string", "name": "pet"}}, + {"name": "accounts", "type": {"type": "map", "values": "long", "name": "account"}}, + {"name": "favorite_colors", "type": {"type": "enum", "name": "FavoriteColor", "symbols": ["BLUE", "YELLOW", "GREEN"]}}, + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": None} + ], +} +``` + +### Serialization to avro or avro-json and json payload + +For serialization is neccesary to use python class/dataclasses instance + +```python +from dataclasses import dataclass + +import typing + +from dataclasses_avroschema import AvroModel + + +@dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + + +@dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + +address_data = { + "street": "test", + "street_number": 10, +} + +# create an Address instance +address = Address(**address_data) + +data_user = { + "name": "john", + "age": 20, + "addresses": [address], +} + +# create an User instance +user = User(**data_user) + +user.serialize() +# >>> b"\x08john(\x02\x08test\x14\x00" + +user.serialize(serialization_type="avro-json") +# >>> b'{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# Get the json from the instance +user.to_json() +# >>> '{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# Get a python dict +user.to_dict() +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} + +``` + +### Deserialization + +Deserialization could take place with an instance dataclass or the dataclass itself. Can return the dict representation or a new class instance + +```python +import typing +import dataclasses + +from dataclasses_avroschema import AvroModel + + +@dataclasses.dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + +@dataclasses.dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + +avro_binary = b"\x08john(\x02\x08test\x14\x00" +avro_json_binary = b'{"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]}' + +# return a new class instance!! +User.deserialize(avro_binary) +# >>>> User(name='john', age=20, addresses=[Address(street='test', street_number=10)]) + +# return a python dict +User.deserialize(avro_binary, create_instance=False) +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} + +# return a new class instance!! +User.deserialize(avro_json_binary, serialization_type="avro-json") +# >>>> User(name='john', age=20, addresses=[Address(street='test', street_number=10)]) + +# return a python dict +User.deserialize(avro_json_binary, serialization_type="avro-json", create_instance=False) +# >>> {"name": "john", "age": 20, "addresses": [{"street": "test", "street_number": 10}]} +``` + +## Pydantic integration + +To add `dataclasses-avroschema` functionality to `pydantic` you only need to replace `BaseModel` by `AvroBaseModel`: + +```python +import typing +import enum +import dataclasses + +from dataclasses_avroschema.avrodantic import AvroBaseModel + +from pydantic import Field + + +class FavoriteColor(str, enum.Enum): + BLUE = "BLUE" + YELLOW = "YELLOW" + GREEN = "GREEN" + + +@dataclasses.dataclass +class UserAdvance(AvroBaseModel): + name: str + age: int + pets: typing.List[str] = Field(default_factory=lambda: ["dog", "cat"]) + accounts: typing.Dict[str, int] = Field(default_factory=lambda: {"key": 1}) + has_car: bool = False + favorite_colors: FavoriteColor = FavoriteColor.BLUE + country: str = "Argentina" + address: str = None + + class Meta: + schema_doc = False + + +# Avro schema +UserAdvance.avro_schema() +'{ + "type": "record", + "name": "UserAdvance", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "long"}, + {"name": "pets", "type": {"type": "array", "items": "string", "name": "pet"}, "default": ["dog", "cat"]}, + {"name": "accounts", "type": {"type": "map", "values": "long", "name": "account"}, "default": {"key": 1}}, + {"name": "has_car", "type": "boolean", "default": false}, + {"name": "favorite_colors", "type": {"type": "enum", "name": "favorite_color", "symbols": ["BLUE", "YELLOW", "GREEN"]}, "default": "BLUE"}, + {"name": "country", "type": "string", "default": "Argentina"}, + {"name": "address", "type": ["null", "string"], "default": null} + ] +}' + +user = UserAdvance(name="bond", age=50) + +# pydantic +user.dict() +# >>> {'name': 'bond', 'age': 50, 'pets': ['dog', 'cat'], 'accounts': {'key': 1}, 'has_car': False, 'favorite_colors': <FavoriteColor.BLUE: 'BLUE'>, 'country': 'Argentina', 'address': None} + +# pydantic +user.json() +# >>> '{"name": "bond", "age": 50, "pets": ["dog", "cat"], "accounts": {"key": 1}, "has_car": false, "favorite_colors": "BLUE", "country": "Argentina", "address": null}' + +# pydantic +user = UserAdvance(name="bond") + +# ValidationError: 1 validation error for UserAdvance +# age +# field required (type=value_error.missing) + + +# dataclasses-avroschema +event = user.serialize() +print(event) +# >>> b'\x08bondd\x04\x06dog\x06cat\x00\x02\x06key\x02\x00\x00\x00\x12Argentina\x00' + +UserAdvance.deserialize(data=event) +# >>> UserAdvance(name='bond', age=50, pets=['dog', 'cat'], accounts={'key': 1}, has_car=False, favorite_colors=<FavoriteColor.BLUE: 'BLUE'>, country='Argentina', address=None) +``` + +## Examples with python streaming drivers (kafka and redis) + +Under [examples](https://github.com/marcosschroh/dataclasses-avroschema/tree/master/examples) folder you can find 3 differents kafka examples, one with [aiokafka](https://github.com/aio-libs/aiokafka) (`async`) showing the simplest use case when a `AvroModel` instance is serialized and sent it thorught kafka, and the event is consumed. +The other two examples are `sync` using the [kafka-python](https://github.com/dpkp/kafka-python) driver, where the `avro-json` serialization and `schema evolution` (`FULL` compatibility) is shown. +Also, there are two `redis` examples using `redis streams` with [walrus](https://github.com/coleifer/walrus) and [redisgears-py](https://github.com/RedisGears/redisgears-py) + +## Factory and fixtures + +[Dataclasses Avro Schema](https://github.com/marcosschroh/dataclasses-avroschema) also includes a `factory` feature, so you can generate `fast` python instances and use them, for example, to test your data streaming pipelines. Instances can be genrated using the `fake` method. + +```python +import typing +import dataclasses + +from dataclasses_avroschema import AvroModel + + +@dataclasses.dataclass +class Address(AvroModel): + "An Address" + street: str + street_number: int + + +@dataclasses.dataclass +class User(AvroModel): + "User with multiple Address" + name: str + age: int + addresses: typing.List[Address] + + +Address.fake() +# >>>> Address(street='PxZJILDRgbXyhWrrPWxQ', street_number=2067) + +User.fake() +# >>>> User(name='VGSBbOGfSGjkMDnefHIZ', age=8974, addresses=[Address(street='vNpPYgesiHUwwzGcmMiS', street_number=4790)]) +``` + +## Features + +* [x] Primitive types: int, long, double, float, boolean, string and null support +* [x] Complex types: enum, array, map, fixed, unions and records support +* [x] `typing.Annotated` supported +* [x] Logical Types: date, time (millis and micro), datetime (millis and micro), uuid support +* [X] Schema relations (oneToOne, oneToMany) +* [X] Recursive Schemas +* [X] Generate Avro Schemas from `faust.Record` +* [X] Instance serialization correspondent to `avro schema` generated +* [X] Data deserialization. Return python dict or class instance +* [X] Generate json from python class instance +* [X] Case Schemas +* [X] Generate models from `avsc` files +* [X] Examples of integration with `kafka` drivers: [aiokafka](https://github.com/aio-libs/aiokafka), [kafka-python](https://github.com/dpkp/kafka-python) +* [X] Example of integration with `redis` drivers: [walrus](https://github.com/coleifer/walrus) and [redisgears-py](https://github.com/RedisGears/redisgears-py) +* [X] Factory instances +* [X] [Pydantic](https://pydantic-docs.helpmanual.io/) integration + +## Development + +[Poetry](https://python-poetry.org/docs/) is needed to install the dependencies and develope locally + +1. Install dependencies: `poetry install` +2. Code linting: `./scripts/format` +3. Run tests: `./scripts/test` + +For commit messages we use [commitizen](https://commitizen-tools.github.io/commitizen/) in order to standardize a way of committing rules + + +%prep +%autosetup -n dataclasses-avroschema-0.41.1 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-dataclasses-avroschema -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon Apr 10 2023 Python_Bot <Python_Bot@openeuler.org> - 0.41.1-1 +- Package Spec generated @@ -0,0 +1 @@ +c40c5817391be7ca1486b94475d1e6a6 dataclasses_avroschema-0.41.1.tar.gz |