diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-pysparkling.spec | 144 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 146 insertions, 0 deletions
@@ -0,0 +1 @@ +/pysparkling-0.6.2.tar.gz diff --git a/python-pysparkling.spec b/python-pysparkling.spec new file mode 100644 index 0000000..376c403 --- /dev/null +++ b/python-pysparkling.spec @@ -0,0 +1,144 @@ +%global _empty_manifest_terminate_build 0 +Name: python-pysparkling +Version: 0.6.2 +Release: 1 +Summary: Pure Python implementation of the Spark RDD interface. +License: MIT +URL: https://github.com/svenkreiss/pysparkling +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/96/6f/d66fcd96ed26f7526248ba11a4d09a0cd9d2d164f93ff709e3f72a8f425e/pysparkling-0.6.2.tar.gz +BuildArch: noarch + + +%description +**Pysparkling** provides a faster, more responsive way to develop programs +for PySpark. It enables code intended for Spark applications to execute +entirely in Python, without incurring the overhead of initializing and +passing data through the JVM and Hadoop. The focus is on having a lightweight +and fast implementation for small datasets at the expense of some data +resilience features and some parallel processing features. +**How does it work?** To switch execution of a script from PySpark to pysparkling, +have the code initialize a pysparkling Context instead of a SparkContext, and +use the pysparkling Context to set up your RDDs. The beauty is you don't have +to change a single line of code after the Context initialization, because +pysparkling's API is (almost) exactly the same as PySpark's. Since it's so easy +to switch between PySpark and pysparkling, you can choose the right tool for your +use case. +**When would I use it?** Say you are writing a Spark application because you +need robust computation on huge datasets, but you also want the same application +to provide fast answers on a small dataset. You're finding Spark is not responsive +enough for your needs, but you don't want to rewrite an entire separate application +for the *small-answers-fast* problem. You'd rather reuse your Spark code but somehow +get it to run fast. Pysparkling bypasses the stuff that causes Spark's long startup +times and less responsive feel. +Here are a few areas where pysparkling excels: +* Small to medium-scale exploratory data analysis +* Application prototyping +* Low-latency web deployments +* Unit tests + +%package -n python3-pysparkling +Summary: Pure Python implementation of the Spark RDD interface. +Provides: python-pysparkling +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-pysparkling +**Pysparkling** provides a faster, more responsive way to develop programs +for PySpark. It enables code intended for Spark applications to execute +entirely in Python, without incurring the overhead of initializing and +passing data through the JVM and Hadoop. The focus is on having a lightweight +and fast implementation for small datasets at the expense of some data +resilience features and some parallel processing features. +**How does it work?** To switch execution of a script from PySpark to pysparkling, +have the code initialize a pysparkling Context instead of a SparkContext, and +use the pysparkling Context to set up your RDDs. The beauty is you don't have +to change a single line of code after the Context initialization, because +pysparkling's API is (almost) exactly the same as PySpark's. Since it's so easy +to switch between PySpark and pysparkling, you can choose the right tool for your +use case. +**When would I use it?** Say you are writing a Spark application because you +need robust computation on huge datasets, but you also want the same application +to provide fast answers on a small dataset. You're finding Spark is not responsive +enough for your needs, but you don't want to rewrite an entire separate application +for the *small-answers-fast* problem. You'd rather reuse your Spark code but somehow +get it to run fast. Pysparkling bypasses the stuff that causes Spark's long startup +times and less responsive feel. +Here are a few areas where pysparkling excels: +* Small to medium-scale exploratory data analysis +* Application prototyping +* Low-latency web deployments +* Unit tests + +%package help +Summary: Development documents and examples for pysparkling +Provides: python3-pysparkling-doc +%description help +**Pysparkling** provides a faster, more responsive way to develop programs +for PySpark. It enables code intended for Spark applications to execute +entirely in Python, without incurring the overhead of initializing and +passing data through the JVM and Hadoop. The focus is on having a lightweight +and fast implementation for small datasets at the expense of some data +resilience features and some parallel processing features. +**How does it work?** To switch execution of a script from PySpark to pysparkling, +have the code initialize a pysparkling Context instead of a SparkContext, and +use the pysparkling Context to set up your RDDs. The beauty is you don't have +to change a single line of code after the Context initialization, because +pysparkling's API is (almost) exactly the same as PySpark's. Since it's so easy +to switch between PySpark and pysparkling, you can choose the right tool for your +use case. +**When would I use it?** Say you are writing a Spark application because you +need robust computation on huge datasets, but you also want the same application +to provide fast answers on a small dataset. You're finding Spark is not responsive +enough for your needs, but you don't want to rewrite an entire separate application +for the *small-answers-fast* problem. You'd rather reuse your Spark code but somehow +get it to run fast. Pysparkling bypasses the stuff that causes Spark's long startup +times and less responsive feel. +Here are a few areas where pysparkling excels: +* Small to medium-scale exploratory data analysis +* Application prototyping +* Low-latency web deployments +* Unit tests + +%prep +%autosetup -n pysparkling-0.6.2 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-pysparkling -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Tue Apr 11 2023 Python_Bot <Python_Bot@openeuler.org> - 0.6.2-1 +- Package Spec generated @@ -0,0 +1 @@ +331f1ac19913dd053e494a50cfb3c6a3 pysparkling-0.6.2.tar.gz |