From 30da04097050df265b0c584b2a70ceff3f1d96e9 Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Fri, 5 May 2023 04:50:37 +0000 Subject: automatic import of python-sparkautomapper --- python-sparkautomapper.spec | 1055 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1055 insertions(+) create mode 100644 python-sparkautomapper.spec (limited to 'python-sparkautomapper.spec') diff --git a/python-sparkautomapper.spec b/python-sparkautomapper.spec new file mode 100644 index 0000000..6eb0859 --- /dev/null +++ b/python-sparkautomapper.spec @@ -0,0 +1,1055 @@ +%global _empty_manifest_terminate_build 0 +Name: python-sparkautomapper +Version: 1.1.1 +Release: 1 +Summary: AutoMapper for Spark +License: Apache Software License +URL: https://github.com/imranq2/SparkAutoMapper +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/de/96/460e3e8fc2038e77d2620026cb33fc81beb1e921752aa01909917e4d1365/sparkautomapper-1.1.1.tar.gz +BuildArch: noarch + +Requires: python3-pyspark +Requires: python3-logger +Requires: python3-sparkdataframecomparer +Requires: python3-deprecated +Requires: python3-numpy + +%description +[![Build and Test](https://github.com/imranq2/SparkAutoMapper/actions/workflows/build_and_test.yml/badge.svg)](https://github.com/imranq2/SparkAutoMapper/actions/workflows/build_and_test.yml) + +[![Upload Python Package](https://github.com/imranq2/SparkAutoMapper/actions/workflows/python-publish.yml/badge.svg)](https://github.com/imranq2/SparkAutoMapper/actions/workflows/python-publish.yml) + +[![Known Vulnerabilities](https://snyk.io/test/github/imranq2/SparkAutoMapper/badge.svg?targetFile=requirements.txt)](https://snyk.io/test/github/imranq2/SparkAutoMapper?targetFile=requirements.txt) + +# SparkAutoMapper +Fluent API to map data from one view to another in Spark. + +Uses native Spark functions underneath so it is just as fast as hand writing the transformations. + +Since this is just Python, you can use any Python editor. Since everything is typed using Python typings, most editors will auto-complete and warn you when you do something wrong + +## Usage +```shell script +pip install sparkautomapper +``` + +## Documentation +https://icanbwell.github.io/SparkAutoMapper/ + +## SparkAutoMapper input and output +You can pass either a dataframe to SparkAutoMapper or specify the name of a Spark view to read from. + +You can receive the result as a dataframe or (optionally) pass in the name of a view where you want the result. + +## Dynamic Typing Examples +#### Set a column in destination to a text value (read from pass in data frame and return the result in a new dataframe) +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + keys=["member_id"] +).columns( + dst1="hello" +) +``` + +#### Set a column in destination to a text value (read from a Spark view and put result in another Spark view) +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="hello" +) +``` + +#### Set a column in destination to an int value +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1=1050 +) +``` + +#### Copy a column (src1) from source_view to destination view column (dst1) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1=A.column("src1") +) +``` +Or you can use the shortcut for specifying a column (wrap column name in []) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="[src1]" +) +``` + +#### Convert data type for a column (or string literal) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + birthDate=A.date(A.column("date_of_birth")) +) +``` + +#### Use a Spark SQL Expression (Any valid Spark SQL expression can be used) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + gender=A.expression( + """ + CASE + WHEN `Member Sex` = 'F' THEN 'female' + WHEN `Member Sex` = 'M' THEN 'male' + ELSE 'other' + END + """ + ) +) +``` + +#### Specify multiple transformations +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="[src1]", + birthDate=A.date("[date_of_birth]"), + gender=A.expression( + """ + CASE + WHEN `Member Sex` = 'F' THEN 'female' + WHEN `Member Sex` = 'M' THEN 'male' + ELSE 'other' + END + """ + ) +) +``` + +#### Use variables or parameters +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +def mapping(parameters: dict): + mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] + ).columns( + dst1=A.column(parameters["my_column_name"]) + ) +``` + +#### Use conditional logic +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +def mapping(parameters: dict): + mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] + ).columns( + dst1=A.column(parameters["my_column_name"]) + ) + + if parameters["customer"] == "Microsoft": + mapper = mapper.columns( + important_customer=1, + customer_name=parameters["customer"] + ) + return mapper +``` + +#### Using nested array columns +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).withColumn( + dst2=A.list( + [ + "address1", + "address2" + ] + ) +) +``` + +#### Using nested struct columns +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst2=A.complex( + use="usual", + family="imran" + ) +) +``` + +#### Using lists of structs +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst2=A.list( + [ + A.complex( + use="usual", + family="imran" + ), + A.complex( + use="usual", + family="[last_name]" + ) + ] + ) +) +``` + +## Executing the AutoMapper +```python +spark.createDataFrame( + [ + (1, 'Qureshi', 'Imran'), + (2, 'Vidal', 'Michael'), + ], + ['member_id', 'last_name', 'first_name'] +).createOrReplaceTempView("patients") + +source_df: DataFrame = spark.table("patients") + +df = source_df.select("member_id") +df.createOrReplaceTempView("members") + +result_df: DataFrame = mapper.transform(df=df) +``` + +## Statically Typed Examples +To improve the auto-complete and syntax checking even more, you can define Complex types: + +Define a custom data type: +```python +from spark_auto_mapper.type_definitions.automapper_defined_types import AutoMapperTextInputType +from spark_auto_mapper.helpers.automapper_value_parser import AutoMapperValueParser +from spark_auto_mapper.data_types.date import AutoMapperDateDataType +from spark_auto_mapper.data_types.list import AutoMapperList +from spark_auto_mapper_fhir.fhir_types.automapper_fhir_data_type_complex_base import AutoMapperFhirDataTypeComplexBase + + +class AutoMapperFhirDataTypePatient(AutoMapperFhirDataTypeComplexBase): + # noinspection PyPep8Naming + def __init__(self, + id_: AutoMapperTextInputType, + birthDate: AutoMapperDateDataType, + name: AutoMapperList, + gender: AutoMapperTextInputType + ) -> None: + super().__init__() + self.value = dict( + id=AutoMapperValueParser.parse_value(id_), + birthDate=AutoMapperValueParser.parse_value(birthDate), + name=AutoMapperValueParser.parse_value(name), + gender=AutoMapperValueParser.parse_value(gender) + ) + +``` + +Now you get auto-complete and syntax checking: +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapperFhir( + view="members", + source_view="patients", + keys=["member_id"] +).withResource( + resource=F.patient( + id_=A.column("a.member_id"), + birthDate=A.date( + A.column("date_of_birth") + ), + name=A.list( + F.human_name( + use="usual", + family=A.column("last_name") + ) + ), + gender="female" + ) +) +``` + +# Publishing a new package +1. Edit VERSION to increment the version +2. Create a new release +3. The GitHub Action should automatically kick in and publish the package +4. You can see the status in the Actions tab + + +%package -n python3-sparkautomapper +Summary: AutoMapper for Spark +Provides: python-sparkautomapper +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-sparkautomapper +[![Build and Test](https://github.com/imranq2/SparkAutoMapper/actions/workflows/build_and_test.yml/badge.svg)](https://github.com/imranq2/SparkAutoMapper/actions/workflows/build_and_test.yml) + +[![Upload Python Package](https://github.com/imranq2/SparkAutoMapper/actions/workflows/python-publish.yml/badge.svg)](https://github.com/imranq2/SparkAutoMapper/actions/workflows/python-publish.yml) + +[![Known Vulnerabilities](https://snyk.io/test/github/imranq2/SparkAutoMapper/badge.svg?targetFile=requirements.txt)](https://snyk.io/test/github/imranq2/SparkAutoMapper?targetFile=requirements.txt) + +# SparkAutoMapper +Fluent API to map data from one view to another in Spark. + +Uses native Spark functions underneath so it is just as fast as hand writing the transformations. + +Since this is just Python, you can use any Python editor. Since everything is typed using Python typings, most editors will auto-complete and warn you when you do something wrong + +## Usage +```shell script +pip install sparkautomapper +``` + +## Documentation +https://icanbwell.github.io/SparkAutoMapper/ + +## SparkAutoMapper input and output +You can pass either a dataframe to SparkAutoMapper or specify the name of a Spark view to read from. + +You can receive the result as a dataframe or (optionally) pass in the name of a view where you want the result. + +## Dynamic Typing Examples +#### Set a column in destination to a text value (read from pass in data frame and return the result in a new dataframe) +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + keys=["member_id"] +).columns( + dst1="hello" +) +``` + +#### Set a column in destination to a text value (read from a Spark view and put result in another Spark view) +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="hello" +) +``` + +#### Set a column in destination to an int value +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1=1050 +) +``` + +#### Copy a column (src1) from source_view to destination view column (dst1) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1=A.column("src1") +) +``` +Or you can use the shortcut for specifying a column (wrap column name in []) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="[src1]" +) +``` + +#### Convert data type for a column (or string literal) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + birthDate=A.date(A.column("date_of_birth")) +) +``` + +#### Use a Spark SQL Expression (Any valid Spark SQL expression can be used) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + gender=A.expression( + """ + CASE + WHEN `Member Sex` = 'F' THEN 'female' + WHEN `Member Sex` = 'M' THEN 'male' + ELSE 'other' + END + """ + ) +) +``` + +#### Specify multiple transformations +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="[src1]", + birthDate=A.date("[date_of_birth]"), + gender=A.expression( + """ + CASE + WHEN `Member Sex` = 'F' THEN 'female' + WHEN `Member Sex` = 'M' THEN 'male' + ELSE 'other' + END + """ + ) +) +``` + +#### Use variables or parameters +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +def mapping(parameters: dict): + mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] + ).columns( + dst1=A.column(parameters["my_column_name"]) + ) +``` + +#### Use conditional logic +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +def mapping(parameters: dict): + mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] + ).columns( + dst1=A.column(parameters["my_column_name"]) + ) + + if parameters["customer"] == "Microsoft": + mapper = mapper.columns( + important_customer=1, + customer_name=parameters["customer"] + ) + return mapper +``` + +#### Using nested array columns +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).withColumn( + dst2=A.list( + [ + "address1", + "address2" + ] + ) +) +``` + +#### Using nested struct columns +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst2=A.complex( + use="usual", + family="imran" + ) +) +``` + +#### Using lists of structs +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst2=A.list( + [ + A.complex( + use="usual", + family="imran" + ), + A.complex( + use="usual", + family="[last_name]" + ) + ] + ) +) +``` + +## Executing the AutoMapper +```python +spark.createDataFrame( + [ + (1, 'Qureshi', 'Imran'), + (2, 'Vidal', 'Michael'), + ], + ['member_id', 'last_name', 'first_name'] +).createOrReplaceTempView("patients") + +source_df: DataFrame = spark.table("patients") + +df = source_df.select("member_id") +df.createOrReplaceTempView("members") + +result_df: DataFrame = mapper.transform(df=df) +``` + +## Statically Typed Examples +To improve the auto-complete and syntax checking even more, you can define Complex types: + +Define a custom data type: +```python +from spark_auto_mapper.type_definitions.automapper_defined_types import AutoMapperTextInputType +from spark_auto_mapper.helpers.automapper_value_parser import AutoMapperValueParser +from spark_auto_mapper.data_types.date import AutoMapperDateDataType +from spark_auto_mapper.data_types.list import AutoMapperList +from spark_auto_mapper_fhir.fhir_types.automapper_fhir_data_type_complex_base import AutoMapperFhirDataTypeComplexBase + + +class AutoMapperFhirDataTypePatient(AutoMapperFhirDataTypeComplexBase): + # noinspection PyPep8Naming + def __init__(self, + id_: AutoMapperTextInputType, + birthDate: AutoMapperDateDataType, + name: AutoMapperList, + gender: AutoMapperTextInputType + ) -> None: + super().__init__() + self.value = dict( + id=AutoMapperValueParser.parse_value(id_), + birthDate=AutoMapperValueParser.parse_value(birthDate), + name=AutoMapperValueParser.parse_value(name), + gender=AutoMapperValueParser.parse_value(gender) + ) + +``` + +Now you get auto-complete and syntax checking: +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapperFhir( + view="members", + source_view="patients", + keys=["member_id"] +).withResource( + resource=F.patient( + id_=A.column("a.member_id"), + birthDate=A.date( + A.column("date_of_birth") + ), + name=A.list( + F.human_name( + use="usual", + family=A.column("last_name") + ) + ), + gender="female" + ) +) +``` + +# Publishing a new package +1. Edit VERSION to increment the version +2. Create a new release +3. The GitHub Action should automatically kick in and publish the package +4. You can see the status in the Actions tab + + +%package help +Summary: Development documents and examples for sparkautomapper +Provides: python3-sparkautomapper-doc +%description help +[![Build and Test](https://github.com/imranq2/SparkAutoMapper/actions/workflows/build_and_test.yml/badge.svg)](https://github.com/imranq2/SparkAutoMapper/actions/workflows/build_and_test.yml) + +[![Upload Python Package](https://github.com/imranq2/SparkAutoMapper/actions/workflows/python-publish.yml/badge.svg)](https://github.com/imranq2/SparkAutoMapper/actions/workflows/python-publish.yml) + +[![Known Vulnerabilities](https://snyk.io/test/github/imranq2/SparkAutoMapper/badge.svg?targetFile=requirements.txt)](https://snyk.io/test/github/imranq2/SparkAutoMapper?targetFile=requirements.txt) + +# SparkAutoMapper +Fluent API to map data from one view to another in Spark. + +Uses native Spark functions underneath so it is just as fast as hand writing the transformations. + +Since this is just Python, you can use any Python editor. Since everything is typed using Python typings, most editors will auto-complete and warn you when you do something wrong + +## Usage +```shell script +pip install sparkautomapper +``` + +## Documentation +https://icanbwell.github.io/SparkAutoMapper/ + +## SparkAutoMapper input and output +You can pass either a dataframe to SparkAutoMapper or specify the name of a Spark view to read from. + +You can receive the result as a dataframe or (optionally) pass in the name of a view where you want the result. + +## Dynamic Typing Examples +#### Set a column in destination to a text value (read from pass in data frame and return the result in a new dataframe) +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + keys=["member_id"] +).columns( + dst1="hello" +) +``` + +#### Set a column in destination to a text value (read from a Spark view and put result in another Spark view) +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="hello" +) +``` + +#### Set a column in destination to an int value +Set a column in destination to a text value +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1=1050 +) +``` + +#### Copy a column (src1) from source_view to destination view column (dst1) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1=A.column("src1") +) +``` +Or you can use the shortcut for specifying a column (wrap column name in []) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="[src1]" +) +``` + +#### Convert data type for a column (or string literal) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + birthDate=A.date(A.column("date_of_birth")) +) +``` + +#### Use a Spark SQL Expression (Any valid Spark SQL expression can be used) +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + gender=A.expression( + """ + CASE + WHEN `Member Sex` = 'F' THEN 'female' + WHEN `Member Sex` = 'M' THEN 'male' + ELSE 'other' + END + """ + ) +) +``` + +#### Specify multiple transformations +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst1="[src1]", + birthDate=A.date("[date_of_birth]"), + gender=A.expression( + """ + CASE + WHEN `Member Sex` = 'F' THEN 'female' + WHEN `Member Sex` = 'M' THEN 'male' + ELSE 'other' + END + """ + ) +) +``` + +#### Use variables or parameters +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +def mapping(parameters: dict): + mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] + ).columns( + dst1=A.column(parameters["my_column_name"]) + ) +``` + +#### Use conditional logic +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A + +def mapping(parameters: dict): + mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] + ).columns( + dst1=A.column(parameters["my_column_name"]) + ) + + if parameters["customer"] == "Microsoft": + mapper = mapper.columns( + important_customer=1, + customer_name=parameters["customer"] + ) + return mapper +``` + +#### Using nested array columns +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).withColumn( + dst2=A.list( + [ + "address1", + "address2" + ] + ) +) +``` + +#### Using nested struct columns +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst2=A.complex( + use="usual", + family="imran" + ) +) +``` + +#### Using lists of structs +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapper( + view="members", + source_view="patients", + keys=["member_id"] +).columns( + dst2=A.list( + [ + A.complex( + use="usual", + family="imran" + ), + A.complex( + use="usual", + family="[last_name]" + ) + ] + ) +) +``` + +## Executing the AutoMapper +```python +spark.createDataFrame( + [ + (1, 'Qureshi', 'Imran'), + (2, 'Vidal', 'Michael'), + ], + ['member_id', 'last_name', 'first_name'] +).createOrReplaceTempView("patients") + +source_df: DataFrame = spark.table("patients") + +df = source_df.select("member_id") +df.createOrReplaceTempView("members") + +result_df: DataFrame = mapper.transform(df=df) +``` + +## Statically Typed Examples +To improve the auto-complete and syntax checking even more, you can define Complex types: + +Define a custom data type: +```python +from spark_auto_mapper.type_definitions.automapper_defined_types import AutoMapperTextInputType +from spark_auto_mapper.helpers.automapper_value_parser import AutoMapperValueParser +from spark_auto_mapper.data_types.date import AutoMapperDateDataType +from spark_auto_mapper.data_types.list import AutoMapperList +from spark_auto_mapper_fhir.fhir_types.automapper_fhir_data_type_complex_base import AutoMapperFhirDataTypeComplexBase + + +class AutoMapperFhirDataTypePatient(AutoMapperFhirDataTypeComplexBase): + # noinspection PyPep8Naming + def __init__(self, + id_: AutoMapperTextInputType, + birthDate: AutoMapperDateDataType, + name: AutoMapperList, + gender: AutoMapperTextInputType + ) -> None: + super().__init__() + self.value = dict( + id=AutoMapperValueParser.parse_value(id_), + birthDate=AutoMapperValueParser.parse_value(birthDate), + name=AutoMapperValueParser.parse_value(name), + gender=AutoMapperValueParser.parse_value(gender) + ) + +``` + +Now you get auto-complete and syntax checking: +```python +from spark_auto_mapper.automappers.automapper import AutoMapper +from spark_auto_mapper.helpers.automapper_helpers import AutoMapperHelpers as A +mapper = AutoMapperFhir( + view="members", + source_view="patients", + keys=["member_id"] +).withResource( + resource=F.patient( + id_=A.column("a.member_id"), + birthDate=A.date( + A.column("date_of_birth") + ), + name=A.list( + F.human_name( + use="usual", + family=A.column("last_name") + ) + ), + gender="female" + ) +) +``` + +# Publishing a new package +1. Edit VERSION to increment the version +2. Create a new release +3. The GitHub Action should automatically kick in and publish the package +4. You can see the status in the Actions tab + + +%prep +%autosetup -n sparkautomapper-1.1.1 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-sparkautomapper -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot - 1.1.1-1 +- Package Spec generated -- cgit v1.2.3