diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | python-mse.spec | 382 | ||||
| -rw-r--r-- | sources | 1 |
3 files changed, 384 insertions, 0 deletions
@@ -0,0 +1 @@ +/mse-0.1.4.tar.gz diff --git a/python-mse.spec b/python-mse.spec new file mode 100644 index 0000000..2e6701b --- /dev/null +++ b/python-mse.spec @@ -0,0 +1,382 @@ +%global _empty_manifest_terminate_build 0 +Name: python-mse +Version: 0.1.4 +Release: 1 +Summary: Make Structs Easy (MSE) +License: Apache license 2.0 +URL: https://github.com/fqaiser94/mse +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/66/14/eebbed44d2c1251d932d4986f60511eadd43284855652437387d98481f6b/mse-0.1.4.tar.gz +BuildArch: noarch + +Requires: python3-pyspark + +%description +This library adds `withField`, `withFieldRenamed`, and `dropFields` methods to the Column class allowing users to easily add, rename, and drop fields inside StructType columns. +The signature and behaviour of these methods is intended to be similar to their Dataset equivalents, namely the `withColumn`, `withColumnRenamed`, and `drop` methods. + +The methods themselves are backed by efficient Catalyst Expressions and as a result, should provide better performance than equivalent UDFs. +While this library "monkey patches" the methods on to the Column class, +there is an on-going effort to add these methods natively to the Column class in the Apache Spark SQL project. +You can follow along with the progress of this initiative in [SPARK-22231](https://issues.apache.org/jira/browse/SPARK-22231). + +If you find this project useful, please consider supporting it by giving a star! + +# Supported Spark versions + +MSE should work without any further requirements on Spark/PySpark 2.4.x. +The library is available for Python 3.x. + +# Installation + +Stable releases of MSE are published to PyPi. +You will also need to provide your PySpark application/s with the path to the MSE jar which you can get from [here](https://search.maven.org/artifact/com.github.fqaiser94/mse_2.11). +For example: + +```bash +pip install mse +curl https://repo1.maven.org/maven2/com/github/fqaiser94/mse_2.11/0.2.4/mse_2.11-0.2.4.jar --output mse.jar +pyspark --jars mse.jar +``` + +If you get errors like `TypeError: 'JavaPackage' object is not callable`, this usually indicates that you haven't +provided PySpark with the correct path to the MSE jar. + +# Usage +To bring in to scope the (implicit) Column methods in Python, use: + +```python3 +from mse import * +``` + +You can now use these methods to manipulate fields in a StructType column: + +```python3 +from pyspark.sql import * +from pyspark.sql.functions import * +from pyspark.sql.types import * +from mse import * + +# Generate some example data +structLevel1 = spark.createDataFrame( + sc.parallelize([Row(Row(1, None, 3))]), + StructType([ + StructField("a", StructType([ + StructField("a", IntegerType()), + StructField("b", IntegerType()), + StructField("c", IntegerType())]))])).cache() + +structLevel1.show() +# +-------+ +# | a| +# +-------+ +# |[1,, 3]| +# +-------+ + +structLevel1.printSchema() +# root +# |-- a: struct (nullable = true) +# | |-- a: integer (nullable = true) +# | |-- b: integer (nullable = true) +# | |-- c: integer (nullable = true) + +# add new field to top level struct +structLevel1.withColumn("a", col("a").withField("d", lit(4))).show() +# +----------+ +# | a| +# +----------+ +# |[1,, 3, 4]| +# +----------+ + +# replace field in top level struct +structLevel1.withColumn("a", col("a").withField("b", lit(2))).show() +# +---------+ +# | a| +# +---------+ +# |[1, 2, 3]| +# +---------+ + +# rename field in top level struct +structLevel1.withColumn("a", col("a").withFieldRenamed("b", "z")).printSchema() +# root +# |-- a: struct (nullable = true) +# | |-- a: integer (nullable = true) +# | |-- z: integer (nullable = true) +# | |-- c: integer (nullable = true) + +# drop field in top level struct +structLevel1.withColumn("a", col("a").dropFields("b")).show() +# +------+ +# | a| +# +------+ +# |[1, 3]| +# +------+ +``` + +For more complicated examples, see the GitHub page. + + + +%package -n python3-mse +Summary: Make Structs Easy (MSE) +Provides: python-mse +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-mse +This library adds `withField`, `withFieldRenamed`, and `dropFields` methods to the Column class allowing users to easily add, rename, and drop fields inside StructType columns. +The signature and behaviour of these methods is intended to be similar to their Dataset equivalents, namely the `withColumn`, `withColumnRenamed`, and `drop` methods. + +The methods themselves are backed by efficient Catalyst Expressions and as a result, should provide better performance than equivalent UDFs. +While this library "monkey patches" the methods on to the Column class, +there is an on-going effort to add these methods natively to the Column class in the Apache Spark SQL project. +You can follow along with the progress of this initiative in [SPARK-22231](https://issues.apache.org/jira/browse/SPARK-22231). + +If you find this project useful, please consider supporting it by giving a star! + +# Supported Spark versions + +MSE should work without any further requirements on Spark/PySpark 2.4.x. +The library is available for Python 3.x. + +# Installation + +Stable releases of MSE are published to PyPi. +You will also need to provide your PySpark application/s with the path to the MSE jar which you can get from [here](https://search.maven.org/artifact/com.github.fqaiser94/mse_2.11). +For example: + +```bash +pip install mse +curl https://repo1.maven.org/maven2/com/github/fqaiser94/mse_2.11/0.2.4/mse_2.11-0.2.4.jar --output mse.jar +pyspark --jars mse.jar +``` + +If you get errors like `TypeError: 'JavaPackage' object is not callable`, this usually indicates that you haven't +provided PySpark with the correct path to the MSE jar. + +# Usage +To bring in to scope the (implicit) Column methods in Python, use: + +```python3 +from mse import * +``` + +You can now use these methods to manipulate fields in a StructType column: + +```python3 +from pyspark.sql import * +from pyspark.sql.functions import * +from pyspark.sql.types import * +from mse import * + +# Generate some example data +structLevel1 = spark.createDataFrame( + sc.parallelize([Row(Row(1, None, 3))]), + StructType([ + StructField("a", StructType([ + StructField("a", IntegerType()), + StructField("b", IntegerType()), + StructField("c", IntegerType())]))])).cache() + +structLevel1.show() +# +-------+ +# | a| +# +-------+ +# |[1,, 3]| +# +-------+ + +structLevel1.printSchema() +# root +# |-- a: struct (nullable = true) +# | |-- a: integer (nullable = true) +# | |-- b: integer (nullable = true) +# | |-- c: integer (nullable = true) + +# add new field to top level struct +structLevel1.withColumn("a", col("a").withField("d", lit(4))).show() +# +----------+ +# | a| +# +----------+ +# |[1,, 3, 4]| +# +----------+ + +# replace field in top level struct +structLevel1.withColumn("a", col("a").withField("b", lit(2))).show() +# +---------+ +# | a| +# +---------+ +# |[1, 2, 3]| +# +---------+ + +# rename field in top level struct +structLevel1.withColumn("a", col("a").withFieldRenamed("b", "z")).printSchema() +# root +# |-- a: struct (nullable = true) +# | |-- a: integer (nullable = true) +# | |-- z: integer (nullable = true) +# | |-- c: integer (nullable = true) + +# drop field in top level struct +structLevel1.withColumn("a", col("a").dropFields("b")).show() +# +------+ +# | a| +# +------+ +# |[1, 3]| +# +------+ +``` + +For more complicated examples, see the GitHub page. + + + +%package help +Summary: Development documents and examples for mse +Provides: python3-mse-doc +%description help +This library adds `withField`, `withFieldRenamed`, and `dropFields` methods to the Column class allowing users to easily add, rename, and drop fields inside StructType columns. +The signature and behaviour of these methods is intended to be similar to their Dataset equivalents, namely the `withColumn`, `withColumnRenamed`, and `drop` methods. + +The methods themselves are backed by efficient Catalyst Expressions and as a result, should provide better performance than equivalent UDFs. +While this library "monkey patches" the methods on to the Column class, +there is an on-going effort to add these methods natively to the Column class in the Apache Spark SQL project. +You can follow along with the progress of this initiative in [SPARK-22231](https://issues.apache.org/jira/browse/SPARK-22231). + +If you find this project useful, please consider supporting it by giving a star! + +# Supported Spark versions + +MSE should work without any further requirements on Spark/PySpark 2.4.x. +The library is available for Python 3.x. + +# Installation + +Stable releases of MSE are published to PyPi. +You will also need to provide your PySpark application/s with the path to the MSE jar which you can get from [here](https://search.maven.org/artifact/com.github.fqaiser94/mse_2.11). +For example: + +```bash +pip install mse +curl https://repo1.maven.org/maven2/com/github/fqaiser94/mse_2.11/0.2.4/mse_2.11-0.2.4.jar --output mse.jar +pyspark --jars mse.jar +``` + +If you get errors like `TypeError: 'JavaPackage' object is not callable`, this usually indicates that you haven't +provided PySpark with the correct path to the MSE jar. + +# Usage +To bring in to scope the (implicit) Column methods in Python, use: + +```python3 +from mse import * +``` + +You can now use these methods to manipulate fields in a StructType column: + +```python3 +from pyspark.sql import * +from pyspark.sql.functions import * +from pyspark.sql.types import * +from mse import * + +# Generate some example data +structLevel1 = spark.createDataFrame( + sc.parallelize([Row(Row(1, None, 3))]), + StructType([ + StructField("a", StructType([ + StructField("a", IntegerType()), + StructField("b", IntegerType()), + StructField("c", IntegerType())]))])).cache() + +structLevel1.show() +# +-------+ +# | a| +# +-------+ +# |[1,, 3]| +# +-------+ + +structLevel1.printSchema() +# root +# |-- a: struct (nullable = true) +# | |-- a: integer (nullable = true) +# | |-- b: integer (nullable = true) +# | |-- c: integer (nullable = true) + +# add new field to top level struct +structLevel1.withColumn("a", col("a").withField("d", lit(4))).show() +# +----------+ +# | a| +# +----------+ +# |[1,, 3, 4]| +# +----------+ + +# replace field in top level struct +structLevel1.withColumn("a", col("a").withField("b", lit(2))).show() +# +---------+ +# | a| +# +---------+ +# |[1, 2, 3]| +# +---------+ + +# rename field in top level struct +structLevel1.withColumn("a", col("a").withFieldRenamed("b", "z")).printSchema() +# root +# |-- a: struct (nullable = true) +# | |-- a: integer (nullable = true) +# | |-- z: integer (nullable = true) +# | |-- c: integer (nullable = true) + +# drop field in top level struct +structLevel1.withColumn("a", col("a").dropFields("b")).show() +# +------+ +# | a| +# +------+ +# |[1, 3]| +# +------+ +``` + +For more complicated examples, see the GitHub page. + + + +%prep +%autosetup -n mse-0.1.4 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-mse -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.4-1 +- Package Spec generated @@ -0,0 +1 @@ +18c031c48464ea65863266dec92a6dbc mse-0.1.4.tar.gz |
