summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--python-pythena.spec430
-rw-r--r--sources1
3 files changed, 432 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..1666298 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/pythena-1.6.0.tar.gz
diff --git a/python-pythena.spec b/python-pythena.spec
new file mode 100644
index 0000000..d077bb2
--- /dev/null
+++ b/python-pythena.spec
@@ -0,0 +1,430 @@
+%global _empty_manifest_terminate_build 0
+Name: python-pythena
+Version: 1.6.0
+Release: 1
+Summary: A simple athena wrapper leveraging boto3 to execute queries and return results while only requiring a database and a query string.
+License: Mozilla Public License Version 2.0
+URL: https://github.com/chrispruitt/pythena
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/9b/c3/70a1f81672261d101def4cc8e047b9e138f54f576eb55fc3c0c4146294f8/pythena-1.6.0.tar.gz
+BuildArch: noarch
+
+Requires: python3-pandas
+Requires: python3-boto3
+Requires: python3-botocore
+Requires: python3-retrying
+
+%description
+# pythena
+
+This is a simple python module that will allow you to query athena the same way the AWS Athena console would. It only requires a database name and query string.
+
+## Install
+
+```bash
+pip install pythena
+```
+
+## Setup
+
+Be sure to set up your AWS authentication credentials. You can do so by using the aws cli and running
+
+```bash
+pip install awscli
+aws configure
+```
+
+More help on configuring the aws cli here https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html
+
+## Simple Usage
+
+```python
+import pythena
+
+athena_client = pythena.Athena("mydatabase")
+
+# Returns results as a pandas dataframe
+df = athena_client.execute("select * from mytable")
+
+print(df.sample(n=2)) # Prints 2 rows from your dataframe
+```
+
+## Connect to Database
+
+```python
+import boto3
+import pythena
+
+# Connect to a database
+athena_client = pythena.Athena(database="mydatabase")
+# Connect to a database and override default aws region in your aws configuration
+athena_client = pythena.Athena(database="mydatabase", region='us-east-1')
+# Connect to a database and override default profile in your aws configuration
+athena_client = pythena.Athena(database="mydatabase", session=boto3.session.Session())
+
+```
+
+## athena_client.execute()
+
+```
+execute(
+ query='SQL_QUERY', # Required
+ s3_output_url='FULL_S3_PATH', # Optional (Format example: 's3://mybucket/mydir'
+ save_results=TRUE | FALSE # Optional. Defaults to True only when 's3_output_url' is provided. If True, the s3 results will not be deleted and an tuple is returned with the execution_id.
+ run_async=TRUE | FALSE # Optional. If True, allows you to run the query asynchronously. Returns execution_id, use get_result(execution_id) to fetch it when finished
+ workgroup='primary' # Optional. Defaults to 'primary' workgroup
+)
+```
+
+Note: `execute()` returns a tuple (dataframe, execution_id) unless `run_async=True`, then it only returns the execution_id.
+
+## Full Usage Examples
+
+```python
+import boto3
+import pythena
+
+# Prints out all databases listed in the glue catalog
+pythena.print_databases()
+pythena.print_databases(region='us-east-1') # Overrides default region
+pythena.print_databases(session=boto3.session.Session()) # Overrides default profile
+
+# Gets all databases and returns as a list
+pythena.get_databases()
+pythena.get_databases(region='us-east-1') # Overrides default region
+pythena.get_databases(session=boto3.session.Session()) # Overrides default profile
+
+# Connect to a database
+athena_client = pythena.Athena(database="mydatabase")
+athena_client = pythena.Athena(database="mydatabase", region='us-east-1') # Overrides default region
+athena_client = pythena.Athena(database="mydatabase", session=boto3.session.Session()) # Overrides default profile
+
+# Prints out all tables in a database
+athena_client.print_tables()
+
+# Gets all tables in the database you are connected to and returns as a list
+athena_client.get_tables()
+
+# Execute a query, returns tuple with dataframe and athena execution_id
+dataframe, _ = athena_client.execute(query="select * from my_table") # Results are returned as a dataframe
+
+# Execute a query and save results to s3
+dataframe, execution_id = athena_client.execute(query="select * from my_table", s3_output_url="s3://mybucket/mydir") # Results are returned as a dataframe
+
+# Get Execution Id and save results
+dataframe, execution_id = athena_client.execute(query="select * from my_table", save_results=True)
+
+# Get Execution Id and save results
+dataframe, execution_id = athena_client.execute(query="select * from my_table", save_results=True)
+
+# Execute a query asynchronously
+execution_id = athena_client.execute(query="select * from my_table", run_async=True) # Returns just the execution id
+dataframe = athena_client.get_result(execution_id) # Will report errors if query failed or let you know if it is still running
+
+# With asynchronous queries, can check status, get error, or cancel
+pythena.get_query_status(execution_id)
+pythena.get_query_error(execution_id)
+pythena.cancel_query(execution_id)
+
+```
+
+## Note
+
+By default, when executing athena queries, via boto3 or the AWS athena console, the results are saved in an s3 bucket. This module by default, assuming a successful execution, will delete the s3 result file to keep s3 clean. If an s3_output_url is provided, then the results will be saved to that location and will not be deleted.
+
+
+
+
+%package -n python3-pythena
+Summary: A simple athena wrapper leveraging boto3 to execute queries and return results while only requiring a database and a query string.
+Provides: python-pythena
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-pythena
+# pythena
+
+This is a simple python module that will allow you to query athena the same way the AWS Athena console would. It only requires a database name and query string.
+
+## Install
+
+```bash
+pip install pythena
+```
+
+## Setup
+
+Be sure to set up your AWS authentication credentials. You can do so by using the aws cli and running
+
+```bash
+pip install awscli
+aws configure
+```
+
+More help on configuring the aws cli here https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html
+
+## Simple Usage
+
+```python
+import pythena
+
+athena_client = pythena.Athena("mydatabase")
+
+# Returns results as a pandas dataframe
+df = athena_client.execute("select * from mytable")
+
+print(df.sample(n=2)) # Prints 2 rows from your dataframe
+```
+
+## Connect to Database
+
+```python
+import boto3
+import pythena
+
+# Connect to a database
+athena_client = pythena.Athena(database="mydatabase")
+# Connect to a database and override default aws region in your aws configuration
+athena_client = pythena.Athena(database="mydatabase", region='us-east-1')
+# Connect to a database and override default profile in your aws configuration
+athena_client = pythena.Athena(database="mydatabase", session=boto3.session.Session())
+
+```
+
+## athena_client.execute()
+
+```
+execute(
+ query='SQL_QUERY', # Required
+ s3_output_url='FULL_S3_PATH', # Optional (Format example: 's3://mybucket/mydir'
+ save_results=TRUE | FALSE # Optional. Defaults to True only when 's3_output_url' is provided. If True, the s3 results will not be deleted and an tuple is returned with the execution_id.
+ run_async=TRUE | FALSE # Optional. If True, allows you to run the query asynchronously. Returns execution_id, use get_result(execution_id) to fetch it when finished
+ workgroup='primary' # Optional. Defaults to 'primary' workgroup
+)
+```
+
+Note: `execute()` returns a tuple (dataframe, execution_id) unless `run_async=True`, then it only returns the execution_id.
+
+## Full Usage Examples
+
+```python
+import boto3
+import pythena
+
+# Prints out all databases listed in the glue catalog
+pythena.print_databases()
+pythena.print_databases(region='us-east-1') # Overrides default region
+pythena.print_databases(session=boto3.session.Session()) # Overrides default profile
+
+# Gets all databases and returns as a list
+pythena.get_databases()
+pythena.get_databases(region='us-east-1') # Overrides default region
+pythena.get_databases(session=boto3.session.Session()) # Overrides default profile
+
+# Connect to a database
+athena_client = pythena.Athena(database="mydatabase")
+athena_client = pythena.Athena(database="mydatabase", region='us-east-1') # Overrides default region
+athena_client = pythena.Athena(database="mydatabase", session=boto3.session.Session()) # Overrides default profile
+
+# Prints out all tables in a database
+athena_client.print_tables()
+
+# Gets all tables in the database you are connected to and returns as a list
+athena_client.get_tables()
+
+# Execute a query, returns tuple with dataframe and athena execution_id
+dataframe, _ = athena_client.execute(query="select * from my_table") # Results are returned as a dataframe
+
+# Execute a query and save results to s3
+dataframe, execution_id = athena_client.execute(query="select * from my_table", s3_output_url="s3://mybucket/mydir") # Results are returned as a dataframe
+
+# Get Execution Id and save results
+dataframe, execution_id = athena_client.execute(query="select * from my_table", save_results=True)
+
+# Get Execution Id and save results
+dataframe, execution_id = athena_client.execute(query="select * from my_table", save_results=True)
+
+# Execute a query asynchronously
+execution_id = athena_client.execute(query="select * from my_table", run_async=True) # Returns just the execution id
+dataframe = athena_client.get_result(execution_id) # Will report errors if query failed or let you know if it is still running
+
+# With asynchronous queries, can check status, get error, or cancel
+pythena.get_query_status(execution_id)
+pythena.get_query_error(execution_id)
+pythena.cancel_query(execution_id)
+
+```
+
+## Note
+
+By default, when executing athena queries, via boto3 or the AWS athena console, the results are saved in an s3 bucket. This module by default, assuming a successful execution, will delete the s3 result file to keep s3 clean. If an s3_output_url is provided, then the results will be saved to that location and will not be deleted.
+
+
+
+
+%package help
+Summary: Development documents and examples for pythena
+Provides: python3-pythena-doc
+%description help
+# pythena
+
+This is a simple python module that will allow you to query athena the same way the AWS Athena console would. It only requires a database name and query string.
+
+## Install
+
+```bash
+pip install pythena
+```
+
+## Setup
+
+Be sure to set up your AWS authentication credentials. You can do so by using the aws cli and running
+
+```bash
+pip install awscli
+aws configure
+```
+
+More help on configuring the aws cli here https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html
+
+## Simple Usage
+
+```python
+import pythena
+
+athena_client = pythena.Athena("mydatabase")
+
+# Returns results as a pandas dataframe
+df = athena_client.execute("select * from mytable")
+
+print(df.sample(n=2)) # Prints 2 rows from your dataframe
+```
+
+## Connect to Database
+
+```python
+import boto3
+import pythena
+
+# Connect to a database
+athena_client = pythena.Athena(database="mydatabase")
+# Connect to a database and override default aws region in your aws configuration
+athena_client = pythena.Athena(database="mydatabase", region='us-east-1')
+# Connect to a database and override default profile in your aws configuration
+athena_client = pythena.Athena(database="mydatabase", session=boto3.session.Session())
+
+```
+
+## athena_client.execute()
+
+```
+execute(
+ query='SQL_QUERY', # Required
+ s3_output_url='FULL_S3_PATH', # Optional (Format example: 's3://mybucket/mydir'
+ save_results=TRUE | FALSE # Optional. Defaults to True only when 's3_output_url' is provided. If True, the s3 results will not be deleted and an tuple is returned with the execution_id.
+ run_async=TRUE | FALSE # Optional. If True, allows you to run the query asynchronously. Returns execution_id, use get_result(execution_id) to fetch it when finished
+ workgroup='primary' # Optional. Defaults to 'primary' workgroup
+)
+```
+
+Note: `execute()` returns a tuple (dataframe, execution_id) unless `run_async=True`, then it only returns the execution_id.
+
+## Full Usage Examples
+
+```python
+import boto3
+import pythena
+
+# Prints out all databases listed in the glue catalog
+pythena.print_databases()
+pythena.print_databases(region='us-east-1') # Overrides default region
+pythena.print_databases(session=boto3.session.Session()) # Overrides default profile
+
+# Gets all databases and returns as a list
+pythena.get_databases()
+pythena.get_databases(region='us-east-1') # Overrides default region
+pythena.get_databases(session=boto3.session.Session()) # Overrides default profile
+
+# Connect to a database
+athena_client = pythena.Athena(database="mydatabase")
+athena_client = pythena.Athena(database="mydatabase", region='us-east-1') # Overrides default region
+athena_client = pythena.Athena(database="mydatabase", session=boto3.session.Session()) # Overrides default profile
+
+# Prints out all tables in a database
+athena_client.print_tables()
+
+# Gets all tables in the database you are connected to and returns as a list
+athena_client.get_tables()
+
+# Execute a query, returns tuple with dataframe and athena execution_id
+dataframe, _ = athena_client.execute(query="select * from my_table") # Results are returned as a dataframe
+
+# Execute a query and save results to s3
+dataframe, execution_id = athena_client.execute(query="select * from my_table", s3_output_url="s3://mybucket/mydir") # Results are returned as a dataframe
+
+# Get Execution Id and save results
+dataframe, execution_id = athena_client.execute(query="select * from my_table", save_results=True)
+
+# Get Execution Id and save results
+dataframe, execution_id = athena_client.execute(query="select * from my_table", save_results=True)
+
+# Execute a query asynchronously
+execution_id = athena_client.execute(query="select * from my_table", run_async=True) # Returns just the execution id
+dataframe = athena_client.get_result(execution_id) # Will report errors if query failed or let you know if it is still running
+
+# With asynchronous queries, can check status, get error, or cancel
+pythena.get_query_status(execution_id)
+pythena.get_query_error(execution_id)
+pythena.cancel_query(execution_id)
+
+```
+
+## Note
+
+By default, when executing athena queries, via boto3 or the AWS athena console, the results are saved in an s3 bucket. This module by default, assuming a successful execution, will delete the s3 result file to keep s3 clean. If an s3_output_url is provided, then the results will be saved to that location and will not be deleted.
+
+
+
+
+%prep
+%autosetup -n pythena-1.6.0
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-pythena -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Tue Apr 11 2023 Python_Bot <Python_Bot@openeuler.org> - 1.6.0-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..093b0f0
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+cd0d1fd8e1487a46932a43e97e5e816a pythena-1.6.0.tar.gz