summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-04-11 19:30:03 +0000
committerCoprDistGit <infra@openeuler.org>2023-04-11 19:30:03 +0000
commit5f62b59b5c764369f54eb48878d0835ea55deb21 (patch)
treef149d394e782ca48afeb970b5da0abdf2e7bf09e
parent98fe870cffbd3aa010d61071d0a9781698f34ece (diff)
automatic import of python-parquet-tools
-rw-r--r--.gitignore1
-rw-r--r--python-parquet-tools.spec481
-rw-r--r--sources1
3 files changed, 483 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..3725707 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/parquet_tools-0.2.13.tar.gz
diff --git a/python-parquet-tools.spec b/python-parquet-tools.spec
new file mode 100644
index 0000000..83eeef5
--- /dev/null
+++ b/python-parquet-tools.spec
@@ -0,0 +1,481 @@
+%global _empty_manifest_terminate_build 0
+Name: python-parquet-tools
+Version: 0.2.13
+Release: 1
+Summary: Easy install parquet-tools
+License: MIT
+URL: https://github.com/ktrueda/parquet-tools
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/98/b8/69e0b7adb2bc9e8c807bce6e1eb5294e24f85986780c4cfb0b36b4492b51/parquet_tools-0.2.13.tar.gz
+BuildArch: noarch
+
+Requires: python3-boto3
+Requires: python3-colorama
+Requires: python3-halo
+Requires: python3-pandas
+Requires: python3-pyarrow
+Requires: python3-tabulate
+Requires: python3-thrift
+
+%description
+# parquet-tools
+
+![Run Unittest](https://github.com/ktrueda/parquet-tools/workflows/Run%20Unittest/badge.svg)
+![Run CLI test](https://github.com/ktrueda/parquet-tools/workflows/Run%20CLI%20test/badge.svg)
+
+This is a pip installable [parquet-tools](https://github.com/apache/parquet-mr).
+In other words, parquet-tools is a CLI tools of [Apache Arrow](https://github.com/apache/arrow).
+You can show parquet file content/schema on local disk or on Amazon S3.
+It is incompatible with original parquet-tools.
+
+## Features
+
+- Read Parquet data (local file or file on S3)
+- Read Parquet metadata/schema (local file or file on S3)
+
+## Installation
+
+```bash
+$ pip install parquet-tools
+```
+
+## Usage
+
+```bash
+$ parquet-tools --help
+usage: parquet-tools [-h] {show,csv,inspect} ...
+
+parquet CLI tools
+
+positional arguments:
+ {show,csv,inspect}
+ show Show human readble format. see `show -h`
+ csv Cat csv style. see `csv -h`
+ inspect Inspect parquet file. see `inspect -h`
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+## Usage Examples
+
+#### Show local parquet file
+
+```bash
+$ parquet-tools show test.parquet
++-------+-------+---------+
+| one | two | three |
+|-------+-------+---------|
+| -1 | foo | True |
+| nan | bar | False |
+| 2.5 | baz | True |
++-------+-------+---------+
+```
+
+#### Show parquet file on S3
+
+```bash
+$ parquet-tools show s3://bucket-name/prefix/*
++-------+-------+---------+
+| one | two | three |
+|-------+-------+---------|
+| -1 | foo | True |
+| nan | bar | False |
+| 2.5 | baz | True |
++-------+-------+---------+
+```
+
+
+#### Inspect parquet file schema
+
+```bash
+$ parquet-tools inspect /path/to/parquet
+```
+
+<details>
+
+<summary>Inspect output</summary>
+
+```
+############ file meta data ############
+created_by: parquet-cpp version 1.5.1-SNAPSHOT
+num_columns: 3
+num_rows: 3
+num_row_groups: 1
+format_version: 1.0
+serialized_size: 2226
+
+
+############ Columns ############
+one
+two
+three
+
+############ Column(one) ############
+name: one
+path: one
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: DOUBLE
+logical_type: None
+converted_type (legacy): NONE
+
+############ Column(two) ############
+name: two
+path: two
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: BYTE_ARRAY
+logical_type: String
+converted_type (legacy): UTF8
+
+############ Column(three) ############
+name: three
+path: three
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: BOOLEAN
+logical_type: None
+converted_type (legacy): NONE
+```
+</details>
+
+#### Cat CSV parquet and transform [csvq](https://github.com/mithrandie/csvq)
+
+```bash
+$ parquet-tools csv s3://bucket-name/test.parquet |csvq "select one, three where three"
++-------+-------+
+| one | three |
++-------+-------+
+| -1.0 | True |
+| 2.5 | True |
++-------+-------+
+```
+
+
+
+%package -n python3-parquet-tools
+Summary: Easy install parquet-tools
+Provides: python-parquet-tools
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-parquet-tools
+# parquet-tools
+
+![Run Unittest](https://github.com/ktrueda/parquet-tools/workflows/Run%20Unittest/badge.svg)
+![Run CLI test](https://github.com/ktrueda/parquet-tools/workflows/Run%20CLI%20test/badge.svg)
+
+This is a pip installable [parquet-tools](https://github.com/apache/parquet-mr).
+In other words, parquet-tools is a CLI tools of [Apache Arrow](https://github.com/apache/arrow).
+You can show parquet file content/schema on local disk or on Amazon S3.
+It is incompatible with original parquet-tools.
+
+## Features
+
+- Read Parquet data (local file or file on S3)
+- Read Parquet metadata/schema (local file or file on S3)
+
+## Installation
+
+```bash
+$ pip install parquet-tools
+```
+
+## Usage
+
+```bash
+$ parquet-tools --help
+usage: parquet-tools [-h] {show,csv,inspect} ...
+
+parquet CLI tools
+
+positional arguments:
+ {show,csv,inspect}
+ show Show human readble format. see `show -h`
+ csv Cat csv style. see `csv -h`
+ inspect Inspect parquet file. see `inspect -h`
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+## Usage Examples
+
+#### Show local parquet file
+
+```bash
+$ parquet-tools show test.parquet
++-------+-------+---------+
+| one | two | three |
+|-------+-------+---------|
+| -1 | foo | True |
+| nan | bar | False |
+| 2.5 | baz | True |
++-------+-------+---------+
+```
+
+#### Show parquet file on S3
+
+```bash
+$ parquet-tools show s3://bucket-name/prefix/*
++-------+-------+---------+
+| one | two | three |
+|-------+-------+---------|
+| -1 | foo | True |
+| nan | bar | False |
+| 2.5 | baz | True |
++-------+-------+---------+
+```
+
+
+#### Inspect parquet file schema
+
+```bash
+$ parquet-tools inspect /path/to/parquet
+```
+
+<details>
+
+<summary>Inspect output</summary>
+
+```
+############ file meta data ############
+created_by: parquet-cpp version 1.5.1-SNAPSHOT
+num_columns: 3
+num_rows: 3
+num_row_groups: 1
+format_version: 1.0
+serialized_size: 2226
+
+
+############ Columns ############
+one
+two
+three
+
+############ Column(one) ############
+name: one
+path: one
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: DOUBLE
+logical_type: None
+converted_type (legacy): NONE
+
+############ Column(two) ############
+name: two
+path: two
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: BYTE_ARRAY
+logical_type: String
+converted_type (legacy): UTF8
+
+############ Column(three) ############
+name: three
+path: three
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: BOOLEAN
+logical_type: None
+converted_type (legacy): NONE
+```
+</details>
+
+#### Cat CSV parquet and transform [csvq](https://github.com/mithrandie/csvq)
+
+```bash
+$ parquet-tools csv s3://bucket-name/test.parquet |csvq "select one, three where three"
++-------+-------+
+| one | three |
++-------+-------+
+| -1.0 | True |
+| 2.5 | True |
++-------+-------+
+```
+
+
+
+%package help
+Summary: Development documents and examples for parquet-tools
+Provides: python3-parquet-tools-doc
+%description help
+# parquet-tools
+
+![Run Unittest](https://github.com/ktrueda/parquet-tools/workflows/Run%20Unittest/badge.svg)
+![Run CLI test](https://github.com/ktrueda/parquet-tools/workflows/Run%20CLI%20test/badge.svg)
+
+This is a pip installable [parquet-tools](https://github.com/apache/parquet-mr).
+In other words, parquet-tools is a CLI tools of [Apache Arrow](https://github.com/apache/arrow).
+You can show parquet file content/schema on local disk or on Amazon S3.
+It is incompatible with original parquet-tools.
+
+## Features
+
+- Read Parquet data (local file or file on S3)
+- Read Parquet metadata/schema (local file or file on S3)
+
+## Installation
+
+```bash
+$ pip install parquet-tools
+```
+
+## Usage
+
+```bash
+$ parquet-tools --help
+usage: parquet-tools [-h] {show,csv,inspect} ...
+
+parquet CLI tools
+
+positional arguments:
+ {show,csv,inspect}
+ show Show human readble format. see `show -h`
+ csv Cat csv style. see `csv -h`
+ inspect Inspect parquet file. see `inspect -h`
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+## Usage Examples
+
+#### Show local parquet file
+
+```bash
+$ parquet-tools show test.parquet
++-------+-------+---------+
+| one | two | three |
+|-------+-------+---------|
+| -1 | foo | True |
+| nan | bar | False |
+| 2.5 | baz | True |
++-------+-------+---------+
+```
+
+#### Show parquet file on S3
+
+```bash
+$ parquet-tools show s3://bucket-name/prefix/*
++-------+-------+---------+
+| one | two | three |
+|-------+-------+---------|
+| -1 | foo | True |
+| nan | bar | False |
+| 2.5 | baz | True |
++-------+-------+---------+
+```
+
+
+#### Inspect parquet file schema
+
+```bash
+$ parquet-tools inspect /path/to/parquet
+```
+
+<details>
+
+<summary>Inspect output</summary>
+
+```
+############ file meta data ############
+created_by: parquet-cpp version 1.5.1-SNAPSHOT
+num_columns: 3
+num_rows: 3
+num_row_groups: 1
+format_version: 1.0
+serialized_size: 2226
+
+
+############ Columns ############
+one
+two
+three
+
+############ Column(one) ############
+name: one
+path: one
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: DOUBLE
+logical_type: None
+converted_type (legacy): NONE
+
+############ Column(two) ############
+name: two
+path: two
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: BYTE_ARRAY
+logical_type: String
+converted_type (legacy): UTF8
+
+############ Column(three) ############
+name: three
+path: three
+max_definition_level: 1
+max_repetition_level: 0
+physical_type: BOOLEAN
+logical_type: None
+converted_type (legacy): NONE
+```
+</details>
+
+#### Cat CSV parquet and transform [csvq](https://github.com/mithrandie/csvq)
+
+```bash
+$ parquet-tools csv s3://bucket-name/test.parquet |csvq "select one, three where three"
++-------+-------+
+| one | three |
++-------+-------+
+| -1.0 | True |
+| 2.5 | True |
++-------+-------+
+```
+
+
+
+%prep
+%autosetup -n parquet-tools-0.2.13
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-parquet-tools -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Tue Apr 11 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.13-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..1a07476
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+70781b230e881eaffc978deebd10c14e parquet_tools-0.2.13.tar.gz