From fbf25609478297796e712359d77ed534f0c9c48a Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Mon, 15 May 2023 04:38:11 +0000 Subject: automatic import of python-dask-sql --- .gitignore | 1 + python-dask-sql.spec | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 313 insertions(+) create mode 100644 python-dask-sql.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..2bbfa20 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/dask_sql-2023.4.0.tar.gz diff --git a/python-dask-sql.spec b/python-dask-sql.spec new file mode 100644 index 0000000..7fcf21a --- /dev/null +++ b/python-dask-sql.spec @@ -0,0 +1,311 @@ +%global _empty_manifest_terminate_build 0 +Name: python-dask-sql +Version: 2023.4.0 +Release: 1 +Summary: SQL query layer for Dask +License: MIT +URL: https://github.com/dask-contrib/dask-sql/ +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/95/ba/82ec4a5f7e766f66c22b3a5d447a458fe09702a0e965a978b8cea422dff1/dask_sql-2023.4.0.tar.gz + + +%description +## Example +For this example, we use some data loaded from disk and query them with a SQL command from our python code. +Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a large amount of formats (csv, parquet, json,...) and locations (s3, hdfs, gcs,...). +```python +import dask.dataframe as dd +from dask_sql import Context +# Create a context to hold the registered tables +c = Context() +# Load the data and register it in the context +# This will give the table a name, that we can use in queries +df = dd.read_csv("...") +c.create_table("my_data", df) +# Now execute a SQL query. The result is again dask dataframe. +result = c.sql(""" + SELECT + my_data.name, + SUM(my_data.x) + FROM + my_data + GROUP BY + my_data.name +""", return_futures=False) +# Show the result +print(result) +``` +## Quickstart +Have a look into the [documentation](https://dask-sql.readthedocs.io/en/latest/) or start the example notebook on [binder](https://mybinder.org/v2/gh/dask-contrib/dask-sql-binder/main?urlpath=lab). +> `dask-sql` is currently under development and does so far not understand all SQL commands (but a large fraction). +We are actively looking for feedback, improvements and contributors! +## Installation +`dask-sql` can be installed via `conda` (preferred) or `pip` - or in a development environment. +### With `conda` +Create a new conda environment or use your already present environment: + conda create -n dask-sql + conda activate dask-sql +Install the package from the `conda-forge` channel: + conda install dask-sql -c conda-forge +### With `pip` +You can install the package with + pip install dask-sql +### For development +If you want to have the newest (unreleased) `dask-sql` version or if you plan to do development on `dask-sql`, you can also install the package from sources. + git clone https://github.com/dask-contrib/dask-sql.git +Create a new conda environment and install the development environment: + conda env create -f continuous_integration/environment-3.9-dev.yaml +It is not recommended to use `pip` instead of `conda` for the environment setup. +After that, you can install the package in development mode + pip install -e ".[dev]" +The Rust DataFusion bindings are built as part of the `pip install`. +If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings: + python setup.py build install +This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call + pre-commit install +## Testing +You can run the tests (after installation) with + pytest tests +GPU-specific tests require additional dependencies specified in `continuous_integration/gpuci/environment.yaml`. +These can be added to the development environment by running +``` +conda env update -n dask-sql -f continuous_integration/gpuci/environment.yaml +``` +And GPU-specific tests can be run with +``` +pytest tests -m gpu --rungpu +``` +## SQL Server +`dask-sql` comes with a small test implementation for a SQL server. +Instead of rebuilding a full ODBC driver, we re-use the [presto wire protocol](https://github.com/prestodb/presto/wiki/HTTP-Protocol). +It is - so far - only a start of the development and missing important concepts, such as +authentication. +You can test the sql presto server by running (after installation) + dask-sql-server +or by using the created docker image + docker run --rm -it -p 8080:8080 nbraun/dask-sql +in one terminal. This will spin up a server on port 8080 (by default) +that looks similar to a normal presto database to any presto client. +You can test this for example with the default [presto client](https://prestosql.io/docs/current/installation/cli.html): + presto --server localhost:8080 +Now you can fire simple SQL queries (as no data is loaded by default): + => SELECT 1 + 1; + +%package -n python3-dask-sql +Summary: SQL query layer for Dask +Provides: python-dask-sql +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +BuildRequires: python3-cffi +BuildRequires: gcc +BuildRequires: gdb +%description -n python3-dask-sql +## Example +For this example, we use some data loaded from disk and query them with a SQL command from our python code. +Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a large amount of formats (csv, parquet, json,...) and locations (s3, hdfs, gcs,...). +```python +import dask.dataframe as dd +from dask_sql import Context +# Create a context to hold the registered tables +c = Context() +# Load the data and register it in the context +# This will give the table a name, that we can use in queries +df = dd.read_csv("...") +c.create_table("my_data", df) +# Now execute a SQL query. The result is again dask dataframe. +result = c.sql(""" + SELECT + my_data.name, + SUM(my_data.x) + FROM + my_data + GROUP BY + my_data.name +""", return_futures=False) +# Show the result +print(result) +``` +## Quickstart +Have a look into the [documentation](https://dask-sql.readthedocs.io/en/latest/) or start the example notebook on [binder](https://mybinder.org/v2/gh/dask-contrib/dask-sql-binder/main?urlpath=lab). +> `dask-sql` is currently under development and does so far not understand all SQL commands (but a large fraction). +We are actively looking for feedback, improvements and contributors! +## Installation +`dask-sql` can be installed via `conda` (preferred) or `pip` - or in a development environment. +### With `conda` +Create a new conda environment or use your already present environment: + conda create -n dask-sql + conda activate dask-sql +Install the package from the `conda-forge` channel: + conda install dask-sql -c conda-forge +### With `pip` +You can install the package with + pip install dask-sql +### For development +If you want to have the newest (unreleased) `dask-sql` version or if you plan to do development on `dask-sql`, you can also install the package from sources. + git clone https://github.com/dask-contrib/dask-sql.git +Create a new conda environment and install the development environment: + conda env create -f continuous_integration/environment-3.9-dev.yaml +It is not recommended to use `pip` instead of `conda` for the environment setup. +After that, you can install the package in development mode + pip install -e ".[dev]" +The Rust DataFusion bindings are built as part of the `pip install`. +If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings: + python setup.py build install +This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call + pre-commit install +## Testing +You can run the tests (after installation) with + pytest tests +GPU-specific tests require additional dependencies specified in `continuous_integration/gpuci/environment.yaml`. +These can be added to the development environment by running +``` +conda env update -n dask-sql -f continuous_integration/gpuci/environment.yaml +``` +And GPU-specific tests can be run with +``` +pytest tests -m gpu --rungpu +``` +## SQL Server +`dask-sql` comes with a small test implementation for a SQL server. +Instead of rebuilding a full ODBC driver, we re-use the [presto wire protocol](https://github.com/prestodb/presto/wiki/HTTP-Protocol). +It is - so far - only a start of the development and missing important concepts, such as +authentication. +You can test the sql presto server by running (after installation) + dask-sql-server +or by using the created docker image + docker run --rm -it -p 8080:8080 nbraun/dask-sql +in one terminal. This will spin up a server on port 8080 (by default) +that looks similar to a normal presto database to any presto client. +You can test this for example with the default [presto client](https://prestosql.io/docs/current/installation/cli.html): + presto --server localhost:8080 +Now you can fire simple SQL queries (as no data is loaded by default): + => SELECT 1 + 1; + +%package help +Summary: Development documents and examples for dask-sql +Provides: python3-dask-sql-doc +%description help +## Example +For this example, we use some data loaded from disk and query them with a SQL command from our python code. +Any pandas or dask dataframe can be used as input and ``dask-sql`` understands a large amount of formats (csv, parquet, json,...) and locations (s3, hdfs, gcs,...). +```python +import dask.dataframe as dd +from dask_sql import Context +# Create a context to hold the registered tables +c = Context() +# Load the data and register it in the context +# This will give the table a name, that we can use in queries +df = dd.read_csv("...") +c.create_table("my_data", df) +# Now execute a SQL query. The result is again dask dataframe. +result = c.sql(""" + SELECT + my_data.name, + SUM(my_data.x) + FROM + my_data + GROUP BY + my_data.name +""", return_futures=False) +# Show the result +print(result) +``` +## Quickstart +Have a look into the [documentation](https://dask-sql.readthedocs.io/en/latest/) or start the example notebook on [binder](https://mybinder.org/v2/gh/dask-contrib/dask-sql-binder/main?urlpath=lab). +> `dask-sql` is currently under development and does so far not understand all SQL commands (but a large fraction). +We are actively looking for feedback, improvements and contributors! +## Installation +`dask-sql` can be installed via `conda` (preferred) or `pip` - or in a development environment. +### With `conda` +Create a new conda environment or use your already present environment: + conda create -n dask-sql + conda activate dask-sql +Install the package from the `conda-forge` channel: + conda install dask-sql -c conda-forge +### With `pip` +You can install the package with + pip install dask-sql +### For development +If you want to have the newest (unreleased) `dask-sql` version or if you plan to do development on `dask-sql`, you can also install the package from sources. + git clone https://github.com/dask-contrib/dask-sql.git +Create a new conda environment and install the development environment: + conda env create -f continuous_integration/environment-3.9-dev.yaml +It is not recommended to use `pip` instead of `conda` for the environment setup. +After that, you can install the package in development mode + pip install -e ".[dev]" +The Rust DataFusion bindings are built as part of the `pip install`. +If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings: + python setup.py build install +This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call + pre-commit install +## Testing +You can run the tests (after installation) with + pytest tests +GPU-specific tests require additional dependencies specified in `continuous_integration/gpuci/environment.yaml`. +These can be added to the development environment by running +``` +conda env update -n dask-sql -f continuous_integration/gpuci/environment.yaml +``` +And GPU-specific tests can be run with +``` +pytest tests -m gpu --rungpu +``` +## SQL Server +`dask-sql` comes with a small test implementation for a SQL server. +Instead of rebuilding a full ODBC driver, we re-use the [presto wire protocol](https://github.com/prestodb/presto/wiki/HTTP-Protocol). +It is - so far - only a start of the development and missing important concepts, such as +authentication. +You can test the sql presto server by running (after installation) + dask-sql-server +or by using the created docker image + docker run --rm -it -p 8080:8080 nbraun/dask-sql +in one terminal. This will spin up a server on port 8080 (by default) +that looks similar to a normal presto database to any presto client. +You can test this for example with the default [presto client](https://prestosql.io/docs/current/installation/cli.html): + presto --server localhost:8080 +Now you can fire simple SQL queries (as no data is loaded by default): + => SELECT 1 + 1; + +%prep +%autosetup -n dask-sql-2023.4.0 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-dask-sql -f filelist.lst +%dir %{python3_sitearch}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon May 15 2023 Python_Bot - 2023.4.0-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..c694923 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +cf78eed678e2f6e68620441cfeb77b03 dask_sql-2023.4.0.tar.gz -- cgit v1.2.3